In [None]:
from math import sqrt

In [None]:
## Create a dictionary called critics from critics.csv file.
## The keys are the names of the critics, and the values are dictionaries containing
## containing the movie names as the keys and the ratings as the values
critics={}

with open('/content/critics.csv', 'r') as f:
  for line in f:
    #remove new line character
    line = line[:-1]
    tokens = line.split("'")
    #remove empty and other dummy tokens
    tokens = [token for token in tokens if token != "," and token != ""]
    #create a dictionary with the person name
    critics[tokens[0]] = {}
    for i in range(1, len(tokens)):
      if i%2 != 0:
        # get the movie name
        movie = tokens[i]
      elif i%2 == 0:
        # get the rating and change it into integer
        rating = float(tokens[i].split(',')[1])
        critics[tokens[0]][movie] = rating


print(critics) 

{'Lisa Rose': {'Lady in the Water': 2.5, 'Snakes on a Plane': 3.5, 'Just My Luck': 3.0, 'Superman Returns': 3.5, 'You, Me and Dupree': 2.5, 'The Night Listener': 3.0}, 'Gene Seymour': {'Lady in the Water': 3.0, 'Snakes on a Plane': 3.5, 'Just My Luck': 1.5, 'Superman Returns': 5.0, 'The Night Listener': 3.0, 'You, Me and Dupree': 3.5}, 'Michael Phillips': {'Lady in the Water': 2.5, 'Snakes on a Plane': 3.0, 'Superman Returns': 3.5, 'The Night Listener': 4.0}, 'Claudia Puig': {'Snakes on a Plane': 3.5, 'Just My Luck': 3.0, 'The Night Listener': 4.5, 'Superman Returns': 4.0, 'You, Me and Dupree': 2.5}, 'Mick LaSalle': {'Lady in the Water': 3.0, 'Snakes on a Plane': 4.0, 'Just My Luck': 2.0, 'Superman Returns': 3.0, 'The Night Listener': 3.0, 'You, Me and Dupree': 2.0}, 'Jack Matthews': {'Lady in the Water': 3.0, 'Snakes on a Plane': 4.0, 'The Night Listener': 3.0, 'Superman Returns': 5.0, 'You, Me and Dupree': 3.5}, 'Toby': {'Snakes on a Plane': 4.5, 'You, Me and Dupree': 1.0, 'Superman 

In [None]:
# Return a distance-based similarity score for two individuals
# params: critics dictionary, person 1's name, person 2's name 
def sim_distance(prefs,person1,person2):
    # Get the list of shared items (mutually rated)
    si=[]
    # start code 
    si = [movie for movie in prefs[person1].keys() if movie in prefs[person2].keys()]  
    # end code
    # if they have no ratings in common, return 0
    if len(si) == 0:
        return(0)
    # Add up the squares of all the differences of the ratings for the shared items
    sum_of_squared_difference = sum([(prefs[person1][movie]-prefs[person2][movie])**2 for movie in si])  ### write code here (use list comprehension)
    return(1/(1 + sum_of_squared_difference))

In [None]:
print(sim_distance(critics,'Toby','Jack Matthews'))

0.11764705882352941


In [None]:
# Returns the Pearson correlation coefficient for two individuals
# first parameter is the critics dictionary

def sim_pearson(prefs,person1,person2):
    # Get the list of mutually rated items
    si=[]
    si = [movie for movie in prefs[person1].keys() if movie in prefs[person2].keys()] 
    # if they have no ratings in common, return 0
    if len(si) == 0:
        return(0)
    
    #find the number of common elements
    n = len(si)
    
    #Add up all the ratings for each individual
    sum1 =  sum([prefs[person1][movie] for movie in si])                           
    sum2 =  sum([prefs[person2][movie] for movie in si])                           
    
    # Sum up the squares of the ratings for each person
    sum1Sq =  sum([prefs[person1][movie]**2 for movie in si])                       
    sum2Sq =  sum([prefs[person2][movie]**2 for movie in si])                       
    
    # Sum up the products of the ratings for the two individuals 
    # rating person 1 x rating person 2 for all the common ratings and add it up
    pSum =   sum([prefs[person1][movie]*prefs[person2][movie] for movie in si])                         
    
    #Calculate Pearson score
    numerator = pSum - (sum1*sum2/n)
    denominator = sqrt((sum1Sq-pow(sum1,2)/n)*(sum2Sq-pow(sum2,2)/n))
    
    if denominator == 0:
        return(0)
    
    r = numerator/denominator
    return(r)
    

In [None]:
print(sim_pearson(critics,'Toby','Jack Matthews'))

0.66284898035987


In [None]:
# Return the best matches for person from the critics dictionary
# Number of results and similarity function are optional parameters
def topMatches(prefs,person,n=5,similarity=sim_pearson):
    ## find the similarity scores for the person and every other individual
    ## store values in a tuple for the person and other individual
    scores = [(similarity(prefs, person, person2),person2) for person2 in prefs.keys() if person2 != person]
    # sort the list so that the highest scores appear at the top
    scores.sort()
    scores.reverse()
    return(scores[0:n])

In [None]:
print(topMatches(critics,'Toby'))

[(0.9912407071619299, 'Lisa Rose'), (0.9244734516419049, 'Mick LaSalle'), (0.8934051474415647, 'Claudia Puig'), (0.66284898035987, 'Jack Matthews'), (0.38124642583151164, 'Gene Seymour')]


In [None]:
## Get recommendations for a person by using a weighted average of every other user's rankings
def getRecommendations(prefs,person,similarity=sim_pearson):
    totals = {}
    simSums = {}
    for other in prefs:
        # don't compare one to oneself
        if other == person:
            continue
        sim =  similarity(prefs, person, other)                
        
        #ignore scores of zero or lower
        if sim <= 0:
            continue
        for item in prefs[other]:
            # only score movies one haven't seen yet
            if item not in prefs[person] or prefs[person][item]==0:
                #Similariy * Score
                totals.setdefault(item,0)
                totals[item]+= sim*prefs[other][item]
                #Sum of similarities
                simSums.setdefault(item,0)
                # add similarities
                simSums[item]+= sim
        #create a normalized list
        rankings = [(total/simSums[item],item) for item,total in totals.items()]
        # return the sorted list
        rankings.sort()
        rankings.reverse()
    return(rankings)

In [None]:
print(getRecommendations(critics,'Toby'))

[(3.3477895267131017, 'The Night Listener'), (2.8325499182641614, 'Lady in the Water'), (2.530980703765565, 'Just My Luck')]


In [None]:
print(getRecommendations(critics,'Jack Matthews'))

[(2.150559004463025, 'Just My Luck')]
