-
Notifications
You must be signed in to change notification settings - Fork 1
/
utils.py
63 lines (55 loc) · 1.38 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import numpy as np
from math import sqrt
from matplotlib import cm
from sklearn.decomposition import PCA
def readDM(dm_file):
dm_dict = {}
version = ""
with open(dm_file) as f:
dmlines=f.readlines()
f.close()
#Make dictionary with key=row, value=vector
for l in dmlines:
items=l.rstrip().split()
row=items[0]
vec=[float(i) for i in items[1:]]
vec=np.array(vec)
dm_dict[row]=vec
return dm_dict
def cosine_similarity(v1, v2):
if len(v1) != len(v2):
return 0.0
num = np.dot(v1, v2)
den_a = np.dot(v1, v1)
den_b = np.dot(v2, v2)
return num / (sqrt(den_a) * sqrt(den_b))
def neighbours(dm_dict,vec,n):
cosines={}
c=0
for k,v in dm_dict.items():
cos = cosine_similarity(vec, v)
cosines[k]=cos
c+=1
c=0
neighbours = []
for t in sorted(cosines, key=cosines.get, reverse=True):
if c<n:
#print(t,cosines[t])
neighbours.append(t)
c+=1
else:
break
return neighbours
def run_PCA(dm_dict, words, savefile):
m = []
labels = []
for w in words:
labels.append(w)
m.append(dm_dict[w])
pca = PCA(n_components=2)
pca.fit(m)
m_2d = pca.transform(m)
png = make_figure(m_2d,labels)
cax = png.get_axes()[1]
cax.set_visible(False)
png.savefig(savefile)