/
manager.py
120 lines (99 loc) · 3.43 KB
/
manager.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import csv
import sys
import Levenshtein
class SongData:
def __init__(self, lis):
if lis[1] == "" or lis[2] == "":
raise ValueError(f"Invalid data: Not found title or artist: {lis}")
self.program = lis[0]
self.title = lis[1]
self.artist = lis[2]
self.lyricist = lis[3]
self.composer = lis[4]
self.arranger = lis[5]
self.genre = lis[6]
def __str__(self):
return f"{self.program} {self.title} {self.artist}"
def compare(data1, data2):
PRG_GRAVITY = 10
TITLE_GRAVITY = 100
ARTIST_GRAVITY = 10
LYRICIST_GRAVITY = 1
COMPOSER_GRAVITY = 1
ARRANGER_GRAVITY = 1
GENRE_GRAVITY = 0.1
if data1.program == "" or data2.program == "":
PRG_GRAVITY = 0
if data1.lyricist == "" or data2.lyricist == "":
LYRICIST_GRAVITY = 0
if data1.composer == "" or data2.composer == "":
COMPOSER_GRAVITY = 0
if data1.arranger == "" or data2.arranger == "":
ARRANGER_GRAVITY = 0
prgDistance = Levenshtein.ratio(data1.program, data2.program)
titleDistance = Levenshtein.ratio(data1.title, data2.title)
artistDistance = Levenshtein.ratio(data1.artist, data2.artist)
lyricistDistance = Levenshtein.ratio(data1.lyricist, data2.lyricist)
composerDistance = Levenshtein.ratio(data1.composer, data2.composer)
arrangerDistance = Levenshtein.ratio(data1.arranger, data2.arranger)
genreDistance = Levenshtein.distance(data1.genre, data2.genre)
return (
prgDistance * PRG_GRAVITY +
titleDistance * TITLE_GRAVITY +
artistDistance * ARTIST_GRAVITY +
lyricistDistance * LYRICIST_GRAVITY +
composerDistance * COMPOSER_GRAVITY +
arrangerDistance * ARRANGER_GRAVITY +
genreDistance * GENRE_GRAVITY
) / (
PRG_GRAVITY +
TITLE_GRAVITY +
ARTIST_GRAVITY +
LYRICIST_GRAVITY +
COMPOSER_GRAVITY +
ARRANGER_GRAVITY +
GENRE_GRAVITY
) * 100
def sameCompare(filename):
stacked = []
compares = []
with open(filename) as f:
reader = csv.reader(f)
for line in reader:
tmp = SongData(line)
for x in stacked:
distance = compare(tmp, x)
compares.append((distance, str(tmp), str(x)))
stacked.append(tmp)
compares.sort(reverse=True, key=lambda x: x[0])
outputCompares(compares)
def twoCompare(filename1, filename2):
stacked = []
compares = []
with open(filename1) as f:
reader = csv.reader(f)
for line in reader:
stacked.append(SongData(line))
with open(filename2) as f:
reader = csv.reader(f)
for line in reader:
tmp = SongData(line)
for x in stacked:
distance = compare(tmp, x)
compares.append((distance, str(tmp), str(x)))
compares.sort(reverse=True, key=lambda x: x[0])
outputCompares(compares)
def outputCompares(compares):
for c in compares:
print("{:.4g}% - \"{}\" <-> \"{}\"".format(c[0], c[1], c[2]))
if __name__ == "__main__":
arglen = len(sys.argv)
if arglen == 2: # compare same file
sameCompare(sys.argv[1])
elif arglen == 3: # compare two file
if sys.argv[1] == sys.argv[2]:
sameCompare(sys.argv[1])
else:
twoCompare(sys.argv[1], sys.argv[2])
else:
print(f"Usage: python3 {sys.argv[0]} csvfile1 [option:csvfile2]")