In [69]:
from math import sqrt

In [170]:
## Create a dictionary called critics from critics.csv file.
## The keys are the names of the critics, and the values are dictionaries containing
## containing the movie names as the keys and the ratings as the values
critics = {'Lisa Rose': {'Lady in the Water': 2.5, 'Snakes on a Plane':3.5,'Just My Luck': 3.0, 'Superman Returns': 3.5, 'You, Me and Dupree': 2.5, 'The Night Listener': 3.0},
'Gene Seymour': {'Lady in the Water': 3.0, 'Snakes on a Plane': 3.5, 'Just My Luck': 1.5, 'Superman Returns': 5.0, 'The Night Listener': 3.0,'You, Me and Dupree': 3.5},
'Michael Phillips': {'Lady in the Water': 2.5, 'Snakes on a Plane': 3.0, 'Superman Returns': 4.0, 'The Night Listener': 4.0},
'Claudia Puig': {'Snakes on a Plane': 3.5, 'Just My Luck': 3.0, 'The Night Listener': 4.5, 'Superman Returns': 4.0, 'The Night Listener': 3.0,'You, Me and Dupree': 2.5},
'Mick LaSalle': {'Lady in the Water': 3.0, 'Snakes on a Plane': 4.0, 'Just My Luck': 2.0, 'Superman Returns': 3.0, 'The Night Listener': 3.0,'You, Me and Dupree': 2.0},
'Jack Matthews': {'Lady in the Water': 3.0, 'Snakes on a Plane': 4.0, 'Superman Returns': 5.0, 'The Night Listener': 3.0,'You, Me and Dupree': 3.5},
'Toby': {'Snakes on a Plane': 4.5,'Superman Returns': 4.0,'You, Me and Dupree': 4.0}}       

In [171]:
# Return a distance-based similarity score for two individuals
# params: critics dictionary, person 1's name, person 2's name 
def sim_distance(prefs,person1,person2):
    # Get the list of shared items (mutually rated)
    si={}
    # start code
    for item in prefs[person1]:
        if item in prefs[person2]:
            si[item] = 1
    # end code
    # if they have no ratings in common, return 0
    if len(si) == 0:
        return(0)
    # Add up the squares of all the differences of the ratings for the shared items
    sum_of_squared_difference = sum([(prefs[person1][item] - prefs[person2][item])**2 for item in prefs[person1] if item in prefs[person2]]) ### write code here (use list comprehension)
    return(1/(1 + sum_of_squared_difference))

In [172]:
print(sim_distance(critics,'Toby','Jack Matthews'))

0.4


In [175]:
# Returns the Pearson correlation coefficient for two individuals
# first parameter is the critics dictionary

def sim_pearson(prefs,person1,person2):
    # Get the list of mutually rated items
    si={}
    # start code 
    for item in prefs[person1]:
        if item in prefs[person2]:
            si[item] = 1
    # end code
    # if they have no ratings in common, return 0
    if len(si) == 0:
        return(0)
    
    #find the number of common elements
    n = len(si)
    
    #Add up all the ratings for each individual
    sum1 = sum([prefs[person1][item] for item in si])   # write code for person1
    sum2 = sum([prefs[person2][item] for item in si])   # write code for person2
    
    # Sum up the squares of the ratings for each person
    sum1Sq = sum([(prefs[person1][item])**2 for item in si])  # write code for person1
    sum2Sq = sum([(prefs[person2][item])**2 for item in si])  # write code for person2
    
    # Sum up the products of the ratings for the two individuals 
    # rating person 1 x rating person 2 for all the common ratings and add it up
    pSum = sum([prefs[person1][item]*prefs[person2][item] for item in si])      # write code here 
    
    #Calculate Pearson score
    numerator = pSum - (sum1*sum2/n)
    denominator = sqrt((sum1Sq-pow(sum1,2)/n)*(sum2Sq-pow(sum2,2)/n))
    
    if denominator == 0:
        return(0)
    
    r = numerator/denominator
    return(r)
    

In [176]:
print(sim_pearson(critics,'Toby','Jack Matthews'))

-0.18898223650462054


In [177]:
# Return the best matches for person from the critics dictionary
# Number of results and similarity function are optional parameters
def topMatches(prefs,person,n=5,similarity=sim_pearson):
    ## find the similarity scores for the person and every other individual
    ## store values in a tuple for the person and other individual
    scores = [similarity(prefs, person, other) for other in prefs if other == person]
    # sort the list so that the highest scores appear at the top
    scores.sort()
    scores.reverse()
    return(scores[0:n])

In [178]:
print(topMatches(critics,'Toby'))

[1.0]


In [None]:
## Get recommendations for a person by using a weighted average
## of every other user's rankings
def getRecommendations(prefs,person,similarity=sim_pearson):
    totals = {}
    simSums = {}
    for other in prefs:
        #don't compare me to myself
        if other == person:
            continue
        sim = similarity(prefs, person, other)  # write code for similarity here
        
        #ignore scores of zero or lower
        if sim <= 0:
            continue
        for item in prefs[other]:
            #only score movies I haven't seen yet
            if item not in prefs[person] or prefs[person][item]==0:
                #Similariy * Score
                totals.setdefault(item,0)
                total = sim * #  Write code for totals here (1 line)
                #Sum of similarities
                simSums.setdefault(item,0)
                simSums = sim + # Write code here to add similarity (1 line)
            #create a normalized list
        rankings = [(total/simSums[item],item) for item,total in totals.items()]
        # return the sorted list
        rankings.sort()
        rankings.reverse()
        return(rankings)

In [None]:
print(getRecommendations(critics,'Toby'))

In [None]:
print(getRecommendations(critics,'Jack Matthews'))