In [38]:
from math import sqrt

In [39]:
#the dataset
critics = {
    'Lisa Rose': {
        'Lady in the Water': 2.5,
        'Snakes on a Plane': 3.5,
        'Just My Luck': 3.0,
        'Superman Returns': 3.5,
        'You, Me and Dupree': 2.5,
        'The Night Listener': 3.0,
    },
    'Gene Seymour': {
        'Lady in the Water': 3.0,
        'Snakes on a Plane': 3.5,
        'Just My Luck': 1.5,
        'Superman Returns': 5.0,
        'The Night Listener': 3.0,
        'You, Me and Dupree': 3.5,
    },
    'Michael Phillips': {
        'Lady in the Water': 2.5,
        'Snakes on a Plane': 3.0,
        'Superman Returns': 3.5,
        'The Night Listener': 4.0,
    },
    'Claudia Puig': {
        'Snakes on a Plane': 3.5,
        'Just My Luck': 3.0,
        'The Night Listener': 4.5,
        'Superman Returns': 4.0,
        'You, Me and Dupree': 2.5,
    },
    'Mick LaSalle': {
        'Lady in the Water': 3.0,
        'Snakes on a Plane': 4.0,
        'Just My Luck': 2.0,
        'Superman Returns': 3.0,
        'The Night Listener': 3.0,
        'You, Me and Dupree': 2.0,
    },
    'Jack Matthews': {
        'Lady in the Water': 3.0,
        'Snakes on a Plane': 4.0,
        'The Night Listener': 3.0,
        'Superman Returns': 5.0,
        'You, Me and Dupree': 3.5,
    },
    'Toby': {'Snakes on a Plane': 4.5, 'You, Me and Dupree': 1.0,
             'Superman Returns': 4.0}
}

In [40]:
#Euclidean distance score (index for finding similarity between people)

def sim_distance(prefs, person1, person2):
    si = {}        #dictionary for storing similar movies
    for item in prefs[person1]:
        if item in prefs[person2]:
            si[item] = 1
    
    
    if len(si) == 0:
        return 0
    
    sum_of_squares = sum([pow(prefs[person1][item]-prefs[person2][item], 2) for item in si])
    
    return 1/(1+sqrt(sum_of_squares))
            
            

In [41]:
sim_distance(critics, 'Lisa Rose', 'Gene Seymour')

0.29429805508554946

In [42]:
def sim_pearson(prefs, person1, person2):
    si = {}
    for item in prefs[person1]:
        if item in prefs[person2]:
            si[item] = 1
            
    n = len(si)
    if n ==0:
        return 0
    
    #add the preferences
    sum1 = sum([prefs[person1][it] for it in si])
    sum2 = sum([prefs[person2][it] for it in si])
    
    #sum of squares
    sum1Sq = sum([pow(prefs[person1][it],2) for it in si])
    sum2Sq = sum([pow(prefs[person2][it],2) for it in si])
    
    pSum = sum([prefs[person1][it]*prefs[person2][it] for it in si])
    
    num = pSum - (sum1*sum2/n)
    den = sqrt((sum1Sq-pow(sum1,2)/n)*(sum2Sq-pow(sum2,2)/n))
    
    if den==0:return 0
    
    return (num/den)

            

In [43]:
sim_pearson(critics, 'Lisa Rose', 'Gene Seymour')

0.39605901719066977

In [44]:
def topMatches(prefs, person, n = 5, similarity = sim_pearson):
    scores = [(similarity(prefs,person,other), other) for other in prefs if other != person]
    
    scores.sort(reverse=True)
    return scores[0:n]



In [45]:
topMatches(critics, 'Toby', n=3)

[(0.9912407071619299, 'Lisa Rose'),
 (0.9244734516419049, 'Mick LaSalle'),
 (0.8934051474415647, 'Claudia Puig')]

In [48]:
def getRecommendation(prefs, person, similarity = sim_pearson):
    totals = {}
    sumSims = {}
    for other in prefs:
        if other == person: continue
        sim = similarity(prefs, person, other)
        
        if sim <= 0: continue
        
        for item in prefs[other]:
            if item not in prefs[person] or prefs[person][item] == 0:
                totals.setdefault(item, 0)
                totals[item] += prefs[other][item]*sim
                sumSims.setdefault(item, 0)
                sumSims[item] += sim
                
        rankings = [(total/sumSims[item], item) for item, total in totals.items()]
            
    rankings.sort(reverse=True)
    return rankings
            
    
        

In [49]:
getRecommendation(critics, 'Toby')

[(3.3477895267131017, 'The Night Listener'),
 (2.8325499182641614, 'Lady in the Water'),
 (2.530980703765565, 'Just My Luck')]