In [231]:
from load_dataset_module import UserPreference
from math import sqrt
class Similarity:
    
    def __init__(self,similarityMetric='euclidean',user1={'A':1},user2={'A':2}):
        ''' Init object '''
        self.similarityMetric = similarityMetric
        self.user1 = user1
        self.user2 = user2
        
    def setUser1(self,user):
        ''' Sets dictionary containing books(ISNB) and corresponding rating for user1 '''
        self.user1 = user
        
    def getUser1(self):
        ''' Returns dictionary containing books(ISBN) and correspondin rating for user1 '''
        return self.user1
    
    def setUser2(self,user):
        ''' Sets dictionary containing books(ISNB) and corresponding rating for user2 '''
        self.user2 = user
        
    def getUser2(self):
        ''' Returns dictionary containing books(ISBN) and correspondin rating for user2 '''
        return self.user2 
    
    def setSimilarityMetric(self,similarityMetric):
        ''' Sets similarity metric '''
        self.similarityMetric = similarityMetric
        
    def getSimilarityMetric(self):
        ''' Returns similarity metric '''
        return self.similarityMetric

    def commonality(self):
        ''' Removes any uncommon books from user dictionaries '''
        bookAndRatings1 = self.user1 
        bookAndRatings2 = self.user2
        s1 = set(self.user1)
        s2 = set(self.user2)
        sDiff = s1 ^ s2 # Symmetric difference
        
        for key in sDiff: # Removes any non-common books 
            if key in bookAndRatings1:
                del bookAndRatings1[key]
            if key in bookAndRatings2:
                del bookAndRatings2[key]
            
        self.user1 = bookAndRatings1
        self.user2 = bookAndRatings2
        
    def sortedValues(self,user):
        ''' Items are sorted by key then put into a list containing their values in the same order '''
        return dict(sorted(user.items())).values()

                        
    def getEuclideanMetric(self): 
        ''' Returns the inverse of the sum of euclidean distance'''
        # euclidean = Sqrt of Sum of the difference in ratings squared. 
        # return 1 / (1 + (euclidean))
        
        self.commonality() # Removes any non-common ratings       
        sort1 = self.sortedValues(self.user1) # Sorts book IDs in alphabetical order
        sort2 = self.sortedValues(self.user2)
        
        subtractPower = lambda a,b : (a - b)**2 # Applies the euclidean distance formula to the values
        
        euclidean = sqrt(sum(map(subtractPower,sort1,sort2)))
        similarityMetric = 1 / (1+(euclidean)) # Calculates the inverse of the similarity metric
        return similarityMetric

    
    def getCosineMetric(self):
        ''' Returns cosine similarity '''
        # Ranges from -1 (Opposite) to 1 (the same) with zero suggesting no correlation
        self.commonality() # Removes any non-common ratings    
        sort1 = self.sortedValues(self.user1)# Sorts book IDs in alphabetical order
        sort2 = self.sortedValues(self.user2)
        
        multiply = lambda a,b : a * b # Returns the numerator for our cosine calculation
        multiplications = sum(list(map(multiply,sort1,sort2)))

        square = lambda num : num ** 2 # return the denominator for our cosine calcuation
        sumSquares1 = sum(map(square,sort1))
        sumSquares2 = sum(map(square,sort2))
        
        return multiplications / (sqrt(sumSquares1)*sqrt(sumSquares2))

        
    def getPearson(self):
        ''' Returns the Pearson Coefficient '''
        self.commonality() # Removes any non-common ratings
        '''THIS PART KEEPS GETTING REPEATED THUS SHOULD MAKE NEW FUNCTION'''
        sort1 = self.sortedValues(self.user1)
        sort2 = self.sortedValues(self.user2)
        
        mean = lambda x : sum(x)/len(x) # Returns the mean 
        meanUser1 = mean(sort1)
        meanUser2 = mean(sort2)
        
        subMean = lambda x,y : (x - meanUser1) * (y - meanUser2)
        numerator = sum(list(map(subMean,sort1,sort2))) # Returns numerator
        
        diffSqr1 = lambda x : (x - meanUser1)**2
        diffSqr2 = lambda y : (y - meanUser2)**2
        user1Diff = sum(list(map(diffSqr1,sort1)))
        user2Diff = sum(list(map(diffSqr2,sort2)))
        denominator = sqrt(user1Diff*user2Diff) # Returns denominator
        
        return numerator/denominator
           
    def getSpearmanCorr(self):
        
        pass
    def similarity5():
        pass
    def similarityBooks():
        pass
    

def test():    
    # Instantiate a user_preference object 
    # This will later be done in the test module, but for now it will be used to test
    up = UserPreference()    
    Dictionary = up.userPreference()

    booksAndRatings1 = {} # List containing books and their relevant ratings
    booksAndRatings2 = {}

    for book in Dictionary['276762']:
        booksAndRatings1[book] = list(Dictionary['276762'][book])[-1]
    for book in Dictionary['4114']:
        booksAndRatings2[book] = list(Dictionary['4114'][book])[-1]

    s1 = set(booksAndRatings1)
    s2 = set(booksAndRatings2)
    sDiff = s1 ^ s2

    for key in sDiff:
        if key in booksAndRatings1:
            del booksAndRatings1[key]
        if key in booksAndRatings2:
            del booksAndRatings2[key]

    print(booksAndRatings1)
    print(booksAndRatings2)

# Instantiate similarity object
sim = Similarity()
sim.setUser1({'abc':123,'cba':42,'bbc':4})
sim.setUser2({'abc':123,'cbc':40,'bbc':4})
sim.getPearson()


1.0

In [None]:
lists = ['"276725"','"276725"','"276725"']
[s.strip('"') for s in lists]

In [69]:
BooksAndRatings1 = {'barry':100,'abc':2,'abb':3,'cba':7,'bbc':4}
BooksAndRatings2 = {'abc':5,'abg':3,'cbaa':7,'barry':100,'bbc':7}

s1=set(BooksAndRatings1)
s2=set(BooksAndRatings2)
symmetricDiff = s1 ^ s2 # All elements that only appear in set a or only appear in set b

for key in symmetricDiff:
    if key in BooksAndRatings1:
        del BooksAndRatings1[key]
    if key in BooksAndRatings2:
        del BooksAndRatings2[key]
    
print(BooksAndRatings1)
print(BooksAndRatings2)

# Euclidean distance

def summation(dic1,dic2):
    summation = 0
    for i in dic1:
        for j in dic2:
            if i == j:
                summation+= dic1[i]+dic[j]
            else:
                continue
    return summation
    


{'barry': 100, 'abc': 2, 'bbc': 4}
{'abc': 5, 'barry': 100, 'bbc': 7}
218


In [107]:
l1 = {'A':2,'B':3,'C':5}

mean = lambda ratings : sum(ratings.values())/len(ratings)
mean(l1)

3.3333333333333335

In [135]:
sqr = lambda num : num + num
l1 = (1,2,3,4,5)
m = (map(sqr,l1))
print(m)

<map object at 0x11e2568d0>


In [129]:
# Python program to demonstrate working
# of map.
  
# Return double of n
'''def addition(n):
    return n + n'''
lambda addition : addition + addition

# We double all numbers using map()
numbers = (1, 2, 3, 4)
result = map(addition, numbers)
print(list(result))

[2, 4, 6, 8]


In [131]:
add = lambda addition : addition + addition

numbers = (1,2,3,4)

result = map(add,numbers)
print(list(result))

[2, 4, 6, 8]


In [146]:
sqr = lambda num : num **2
l1 = {'A':1,'B':2,'C':3}
m = sum(map(sqr,l1.values()))
print(m)

14


In [179]:
l1 = {'A':23,'C':43,'B':5} 
l2 = {'B':23,'A':4,'C':22}
sort1 = dict(sorted(l1.items())).values()
sort2 = dict(sorted(l2.items())).values()
multiply = lambda a,b : a * b
multiplications = sum(list(map(multiply,sort1,sort2)))
print(multiplications)
dict(sort2)

1153


TypeError: cannot convert dictionary update sequence element #0 to a sequence

In [211]:
l1 = {'A':2,'B':2}
l2 = {'A':5,'B':5}
l3 = [123,4]
l4 = [123,4]
subtractPower = lambda a,b : (a-b)**2
sqrt(sum(map(subtractPower,l3,l4)))

0.0

In [237]:
l1 = [5,3,4,2,4]
l2 = [6,3,2,3,1]

mean1 = 43
mean2 = 41

calc = lambda a,mean : a - mean
list(map())

TypeError: unsupported operand type(s) for -: 'list' and 'int'