HERE WE ARE GOING TO CREATE A SIMPLE RECOMMENDER ENGINE TO RECOMMEND MOVIES

Collecting Preferences :

In [1]:
#A Dictionary of movie critics and their ratings of a small set of movies
critics={'Lisa Rose': {'Lady in the Water': 2.5, 'Snakes on a Plane': 3.5,
 'Just My Luck': 3.0, 'Superman Returns': 3.5, 'You, Me and Dupree': 2.5,
 'The Night Listener': 3.0},
'Gene Seymour': {'Lady in the Water': 3.0, 'Snakes on a Plane': 3.5,
 'Just My Luck': 1.5, 'Superman Returns': 5.0, 'The Night Listener': 3.0,
 'You, Me and Dupree': 3.5},
'Michael Phillips': {'Lady in the Water': 2.5, 'Snakes on a Plane': 3.0,
 'Superman Returns': 3.5, 'The Night Listener': 4.0},
'Claudia Puig': {'Snakes on a Plane': 3.5, 'Just My Luck': 3.0,
 'The Night Listener': 4.5, 'Superman Returns': 4.0,
 'You, Me and Dupree': 2.5},
'Mick LaSalle': {'Lady in the Water': 3.0, 'Snakes on a Plane': 4.0,
 'Just My Luck': 2.0, 'Superman Returns': 3.0, 'The Night Listener': 3.0,
 'You, Me and Dupree': 2.0},
'Jack Matthews': {'Lady in the Water': 3.0, 'Snakes on a Plane': 4.0,
 'The Night Listener': 3.0, 'Superman Returns': 5.0, 'You, Me and Dupree': 3.5},
'Toby': {'Snakes on a Plane':4.5,'You, Me and Dupree':1.0,'Superman Returns':4.0}}

In [2]:
critics['Lisa Rose']['Lady in the Water']

2.5

In [6]:
critics['Toby']['Snakes on a Plane']=4.5

In [7]:
critics['Toby']

{'Snakes on a Plane': 4.5, 'Superman Returns': 4.0, 'You, Me and Dupree': 1.0}

In [8]:
from math import sqrt
sqrt(pow(5-4,2)+pow(4-1,2))

3.1622776601683795

In [9]:
1/(1+sqrt(pow(5-4,2)+pow(4-1,2)))

0.2402530733520421

In [35]:
# Returns the Pearson correlation coefficient for p1 and p2
def sim_pearson(prefs,p1,p2):
 # Get the list of mutually rated items
 si={}
 for item in prefs[p1]:
  if item in prefs[p2]: 
    si[item]=1
 # Find the number of elements
 n=len(si)
 # if they are no ratings in common, return 0
 if n==0: return 0
 # Add up all the preferences
 sum1=sum([prefs[p1][it] for it in si])
 sum2=sum([prefs[p2][it] for it in si])
 # Sum up the squares
 sum1Sq=sum([pow(prefs[p1][it],2) for it in si])
 sum2Sq=sum([pow(prefs[p2][it],2) for it in si])
 # Sum up the products
 pSum=sum([prefs[p1][it]*prefs[p2][it] for it in si])
 # Calculate Pearson score
 num=pSum-(sum1*sum2/n)
 den=sqrt((sum1Sq-pow(sum1,2)/n)*(sum2Sq-pow(sum2,2)/n))
 if den==0: return 0
 r=num/den
 return r

In [37]:
print sim_pearson(critics,'Lisa Rose','Gene Seymour')

0.396059017191


In [38]:
# Returns the best matches for person from the prefs dictionary.
# Number of results and similarity function are optional params.
def topMatches(prefs,person,n=5,similarity=sim_pearson):
 scores=[(similarity(prefs,person,other),other)
 for other in prefs if other!=person]
 # Sort the list so the highest scores appear at the top
 scores.sort( )
 scores.reverse( )
 return scores[0:n]

In [39]:
topMatches(critics,'Toby',n=3)

[(0.9912407071619299, 'Lisa Rose'),
 (0.9244734516419049, 'Mick LaSalle'),
 (0.8934051474415647, 'Claudia Puig')]

Now let's create recommendations :

In [63]:
# Gets recommendations for a person by using a weighted average
# of every other user's rankings
def getRecommendations(prefs,person,similarity=sim_pearson):
 totals={}
 simSums={}
 for other in prefs:
     # don't compare me to myself
     if other==person: continue
     sim=similarity(prefs,person,other)
     # ignore scores of zero or lower
     if sim<=0: continue
     for item in prefs[other]:
         # only score movies I haven't seen yet
         if item not in prefs[person] or prefs[person][item]==0:
             # Similarity * Score
             totals.setdefault(item,0)
             totals[item]+=prefs[other][item]*sim
             # Sum of similarities
             simSums.setdefault(item,0)
             simSums[item]+=sim
        
 # Create the normalized list
 rankings=[(total/simSums[item],item) for item,total in totals.items( )]
 # Return the sorted list
 rankings.sort( )
 rankings.reverse( )
 return rankings

In [65]:
getRecommendations(critics,'Toby')

[(3.3477895267131013, 'The Night Listener'),
 (2.8325499182641614, 'Lady in the Water'),
 (2.5309807037655645, 'Just My Luck')]

In [67]:
def transformPrefs(prefs):
 result={}
 for person in prefs:
     for item in prefs[person]:
         result.setdefault(item,{})
         # Flip item and person
         result[item][person]=prefs[person][item]
 return result

In [69]:
movies=transformPrefs(critics)
topMatches(movies,'Superman Returns')

[(0.6579516949597695, 'You, Me and Dupree'),
 (0.4879500364742689, 'Lady in the Water'),
 (0.11180339887498941, 'Snakes on a Plane'),
 (-0.1798471947990544, 'The Night Listener'),
 (-0.42289003161103106, 'Just My Luck')]

In [70]:
getRecommendations(movies,'Just My Luck')

[(4.0, 'Michael Phillips'), (3.0, 'Jack Matthews')]