-
Notifications
You must be signed in to change notification settings - Fork 9
/
vector.py
71 lines (52 loc) · 2.36 KB
/
vector.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
'''
Created on Jun 14, 2011
@author: kykamath
'''
import math, random
from operator import itemgetter
class Vector(dict):
def __init__(self, vectorInitialValues = {}):
for k,v in vectorInitialValues.iteritems(): self[k]=v
@property
def dimension(self): return len(self)
def __add__(self, otherVector):
sumVector = Vector(self.copy())
for k, v in otherVector.iteritems():
if k in sumVector: sumVector[k]+=v
else: sumVector[k]=v
return sumVector
def __sub__(self, otherVector):
diffVector = self.copy()
for k, v in otherVector.iteritems():
if k in diffVector: diffVector[k]-=v
else: diffVector[k]=v
return diffVector
def dot(self, otherVector): return reduce(lambda total, k: total+(self.get(k,0)*otherVector.get(k,0)), set(self.keys()).union(otherVector.keys()),0)
def dotWithSmallerVectorWithSubsetDimensions(self, smallerVector): return reduce(lambda total, k: total+(self[k]*smallerVector[k]), smallerVector,0)
def divideByScalar(self, scalar):
for k in self.keys(): self[k]/=scalar
return self
def mod(self): return math.sqrt(sum(x*x for x in self.itervalues()))
def getNormalizedVector(self):
modValue = self.mod()
if modValue==0: return Vector(self)
normalizedVector = Vector()
for k, v in self.iteritems(): normalizedVector[k]=v/modValue
return normalizedVector
def cosineSimilarity(self, otherVector):
return self.getNormalizedVector().dot(otherVector.getNormalizedVector())
def getTopDimensions(self, numberOfFeatures): return dict([(k,v) for k,v in sorted(self.iteritems(), key=itemgetter(1), reverse=True)][:numberOfFeatures])
@staticmethod
def getMeanVector(iterable):
vectorsCollection = list(iterable)
return reduce(lambda x, y: x+y, vectorsCollection, Vector({})).divideByScalar(float(len(vectorsCollection)))
class VectorGenerator:
@staticmethod
def getRandomGaussianUnitVector(dimension, mu, sigma):
vector = Vector()
for i in xrange(dimension): vector[i]=random.normalvariate(mu, sigma)
return vector.getNormalizedVector()
if __name__ == '__main__':
VectorGenerator.getRandomGaussianUnitVector(10, 0, 1)
# a = [1,4,5]
# print set