In [1]:
#https://grouplens.org/datasets/movielens/
import pandas as pd
from tqdm import tqdm

input_folder='../data/ml-latest-small'

In [2]:
movies_df=pd.read_csv(input_folder+'/movies.csv', index_col='movieId')

movies_df

Unnamed: 0_level_0,title,genres
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1
1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
2,Jumanji (1995),Adventure|Children|Fantasy
3,Grumpier Old Men (1995),Comedy|Romance
4,Waiting to Exhale (1995),Comedy|Drama|Romance
5,Father of the Bride Part II (1995),Comedy
...,...,...
193581,Black Butler: Book of the Atlantic (2017),Action|Animation|Comedy|Fantasy
193583,No Game No Life: Zero (2017),Animation|Comedy|Fantasy
193585,Flint (2017),Drama
193587,Bungo Stray Dogs: Dead Apple (2018),Action|Animation


In [3]:
tags_df=pd.read_csv(input_folder+'/tags.csv')

tags_df

Unnamed: 0,userId,movieId,tag,timestamp
0,2,60756,funny,1445714994
1,2,60756,Highly quotable,1445714996
2,2,60756,will ferrell,1445714992
3,2,89774,Boxing story,1445715207
4,2,89774,MMA,1445715200
...,...,...,...,...
3678,606,7382,for katie,1171234019
3679,606,7936,austere,1173392334
3680,610,3265,gun fu,1493843984
3681,610,3265,heroic bloodshed,1493843978


In [4]:
ratings_df=pd.read_csv(input_folder+'/ratings.csv')

ratings_df

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931
...,...,...,...,...
100831,610,166534,4.0,1493848402
100832,610,168248,5.0,1493850091
100833,610,168250,5.0,1494273047
100834,610,168252,5.0,1493846352


In [5]:
def discretize_rating(rating:float):
    
    '''
    Converts a given float rating to a string value 
    
    '''
    polarity='A' # average 
        
    if rating<3: polarity='N' # negative 
    elif rating>3:polarity='P' # positive 
    
    return polarity

<h3> User-based recommendations </h3>

In [6]:
def load_user_ratings(ratings_df:pd.core.frame.DataFrame):
    
    '''
    Loads all the ratings submitted by each user
    
    Returns a dictionary that maps each user to a second dict that maps movies to discretized ratings
    
    '''
    
    distinct_users=set(ratings_df['userId']) # get all distinct users
    
    user_ratings={} # store movie ratings per user 
    
    for user in distinct_users: # for each user

        # get the movie id and rating for every rating submitted by this user
        my_ratings=ratings_df[ratings_df.userId==user][['movieId','rating']] 

        #discretize the ratings and attach them to the user
        user_ratings[user]=dict(zip(my_ratings.movieId, my_ratings.rating.apply(discretize_rating)))
    
    return user_ratings



In [7]:
user_ratings=load_user_ratings(ratings_df)

In [8]:
user_ratings[10]

{296: 'N',
 356: 'P',
 588: 'P',
 597: 'P',
 912: 'P',
 1028: 'N',
 1088: 'A',
 1247: 'A',
 1307: 'A',
 1784: 'P',
 1907: 'P',
 2571: 'N',
 2671: 'P',
 2762: 'N',
 2858: 'N',
 2959: 'N',
 3578: 'P',
 3882: 'A',
 4246: 'P',
 4306: 'P',
 4447: 'P',
 4993: 'P',
 4995: 'P',
 5066: 'A',
 5377: 'P',
 5620: 'A',
 5943: 'A',
 5952: 'P',
 5957: 'A',
 6155: 'A',
 6266: 'A',
 6377: 'P',
 6535: 'P',
 6942: 'P',
 7149: 'P',
 7151: 'A',
 7153: 'P',
 7154: 'P',
 7169: 'P',
 7293: 'P',
 7375: 'P',
 7451: 'P',
 7458: 'P',
 8529: 'P',
 8533: 'P',
 8636: 'P',
 8665: 'P',
 8808: 'A',
 8869: 'P',
 8961: 'N',
 8969: 'P',
 8970: 'N',
 30749: 'P',
 31433: 'N',
 31685: 'P',
 33145: 'A',
 33679: 'A',
 33794: 'P',
 40629: 'A',
 40819: 'P',
 41285: 'N',
 47099: 'A',
 49272: 'P',
 49286: 'P',
 51662: 'A',
 51705: 'P',
 51834: 'N',
 54286: 'P',
 56367: 'P',
 56949: 'A',
 58047: 'P',
 58559: 'P',
 59333: 'N',
 59421: 'N',
 60397: 'A',
 60950: 'N',
 61250: 'N',
 63113: 'P',
 63992: 'P',
 64969: 'N',
 66203: 'P',
 689

In [9]:
from itertools import combinations
from collections import defaultdict

def get_user_neighbors(user_ratings:dict, # ratings submitted by each user
                       min_rating_num:int=5 # at least this many ratings are required for a comparison
                      ):
    
    '''
    Compute rating-based similarity between every two pairs of users 
    
    '''
    
    #get all possible pairs of usres
    pairs=list(combinations(list(user_ratings.keys()),2))
    
    usim=defaultdict(dict) # initialize the sim dictionary
    
    for u1,u2 in pairs: # for every user pair 
   
        #get a set with all the discretized ratings (movie id, polarity tuples) for u1 and u2
        s1=set([(mid,pol) for mid,pol in user_ratings[u1].items()])
        s2=set([(mid,pol) for mid,pol in user_ratings[u2].items()])

        # check if both users respect the lower bound
        if len(s1)<min_rating_num or len(s2)<min_rating_num: continue
      
        # get the union and intersection for these two users
        union=s1.union(s2)
        inter=s1.intersection(s2)
    
        # compute user sim via the jaccard coeff
        jacc=len(inter)/len(union)

        # remember the sim values
        if jacc > 0:
            usim[u1][u2]=jacc
            usim[u2][u1]=jacc
        
    # attach each user to its neighbors, sorted by sim in descending order 
    return {user:sorted(usim[user].items(),key=lambda x:x[1], reverse=True) for user in usim}
    
    
    

In [10]:
neighbors_u=get_user_neighbors(user_ratings)

In [11]:
neighbors_u[10]

[(491, 0.07936507936507936),
 (331, 0.07317073170731707),
 (460, 0.07246376811594203),
 (159, 0.07239819004524888),
 (601, 0.07111111111111111),
 (119, 0.06927710843373494),
 (466, 0.06751054852320675),
 (189, 0.06666666666666667),
 (586, 0.06422018348623854),
 (563, 0.06325301204819277),
 (247, 0.06227106227106227),
 (319, 0.06060606060606061),
 (393, 0.06048387096774194),
 (143, 0.06030150753768844),
 (581, 0.058823529411764705),
 (200, 0.05803571428571429),
 (475, 0.05734767025089606),
 (417, 0.05612244897959184),
 (504, 0.05581395348837209),
 (246, 0.05521472392638037),
 (52, 0.0546875),
 (233, 0.05454545454545454),
 (166, 0.054313099041533544),
 (21, 0.054249547920433995),
 (227, 0.05405405405405406),
 (523, 0.05392156862745098),
 (104, 0.05357142857142857),
 (280, 0.05329153605015674),
 (509, 0.05199306759098787),
 (382, 0.05121951219512195),
 (141, 0.051194539249146756),
 (254, 0.051181102362204724),
 (203, 0.05113636363636364),
 (562, 0.05099150141643059),
 (211, 0.050458715596

In [12]:

def recommend_ub(user:int, 
                 movies_df:pd.core.frame.DataFrame, # movie info  
                 neighbors_u:dict, # neighbors dict
                 user_ratings:dict, # ratings submitted per user 
                 neighbor_num:int, # number of neighbors to consider
                 rec_num:int# number of movies to recommend
                ):
  
    '''
    Delivers user-based recommendations. Given a specific user:
    - find the user's neighbor_num most similar users
    - Go over all the movies rated by all neighbors
    - Each movie gets +2 if a neighbor liked it, -2 if a neighbor didn't like it, -1 if  neighbor was neutral
    - +2,-1,and -2 are scaled based on user sim
    - Sort the movies by their scores in desc order
    - Go over the sorted movie list. If the user has already rated the movie, store its rating. Otherwise print.
    
    '''
    
    top_k=neighbors_u[user][:neighbor_num] # get the top k neighbors of this user
    
    votes=defaultdict(int) # count the votes per movie
    
    for neighbor,sim_val in top_k: # for each neighbor 

        for mid,pol in user_ratings[neighbor].items(): # for each movie rated by this neighbor

            if pol=='P': # positive neighbor rating
                votes[mid]+=2*sim_val
            elif pol=='N': # negative 
                votes[mid]-=2*sim_val
            else: # average 
                votes[mid]-=1*sim_val

    # sort the movies in desc order 
    srt=sorted(votes.items(),key=lambda x:x[1], reverse=True)

    print('\nI suggest the following movies because they have\
    received positive ratings from users who tend to\nlike what you like:\n')
          
    cnt=0 # count number of recommendations made 
    
    already_rated={}
    
    for mov, score in srt: # for each movie 
    
        title=movies_df.loc[mov]['title'] # get the title 
        
        rat=user_ratings[user].get(mov,None) # check if the user has already rated the movie 
        
        if rat: # movie already rated 
            already_rated[title]=rat # store the rating
            continue
     
        cnt+=1 # one more recommendation
        print('\n',mov, title) # print 
    
        if cnt==rec_num:break # stop once you 've made enough recommendations
    
    print('\n',already_rated)


In [13]:
recommend_ub(100, movies_df, neighbors_u, user_ratings, 10, 10)
    


I suggest the following movies because they have    received positive ratings from users who tend to
like what you like:


 2762 Sixth Sense, The (1999)

 260 Star Wars: Episode IV - A New Hope (1977)

 1210 Star Wars: Episode VI - Return of the Jedi (1983)

 150 Apollo 13 (1995)

 1 Toy Story (1995)

 1097 E.T. the Extra-Terrestrial (1982)

 318 Shawshank Redemption, The (1994)

 2858 American Beauty (1999)

 2918 Ferris Bueller's Day Off (1986)

 589 Terminator 2: Judgment Day (1991)

 {'Pretty Woman (1990)': 'P', 'Forrest Gump (1994)': 'P', 'Back to the Future (1985)': 'P', "There's Something About Mary (1998)": 'P', 'Saving Private Ryan (1998)': 'P', 'Men in Black (a.k.a. MIB) (1997)': 'P', 'Office Space (1999)': 'P', 'Breakfast Club, The (1985)': 'P', 'When Harry Met Sally... (1989)': 'P', 'Princess Bride, The (1987)': 'P', 'Pulp Fiction (1994)': 'P', 'Groundhog Day (1993)': 'P', 'Rain Man (1988)': 'P'}


<h3> Movie-based recommendations </h3>

In [29]:
def load_ratings(ratings_df:pd.core.frame.DataFrame,
                focus:list # used to pick movie-based or user-based 
                ):
    
    '''
    Loads all the ratings submitted by each user or all ratings submitted for a movie
    
    Returns:
        a dictionary that maps each user to a second dict that maps movies to discretized ratings
    
        OR 
        
        a dictionary that maps each movie to a second dict that maps raters to discretized ratings
    
    '''
    
    distinct_ids=set(ratings_df[focus[0]]) # get all distinct users
    
    ratings={} # store ratings per entity 
    
    for id_ in tqdm(distinct_ids): # for each user

        # get the infro for every rating submitted for this user or for this movie
        my_ratings=ratings_df[ratings_df[focus[0]]==id_][[focus[1],'rating']] 

        #discretize the ratings and attach them to the user or to the movie
        ratings[id_]=dict(zip(my_ratings[focus[1]], my_ratings.rating.apply(discretize_rating)))
    
    return ratings



In [30]:
user_ratings_v2=load_ratings(ratings_df,['userId','movieId'])

user_ratings_v2[10]


00%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 610/610 [00:00<00:00, 3086.51it/s]

{296: 'N',
 356: 'P',
 588: 'P',
 597: 'P',
 912: 'P',
 1028: 'N',
 1088: 'A',
 1247: 'A',
 1307: 'A',
 1784: 'P',
 1907: 'P',
 2571: 'N',
 2671: 'P',
 2762: 'N',
 2858: 'N',
 2959: 'N',
 3578: 'P',
 3882: 'A',
 4246: 'P',
 4306: 'P',
 4447: 'P',
 4993: 'P',
 4995: 'P',
 5066: 'A',
 5377: 'P',
 5620: 'A',
 5943: 'A',
 5952: 'P',
 5957: 'A',
 6155: 'A',
 6266: 'A',
 6377: 'P',
 6535: 'P',
 6942: 'P',
 7149: 'P',
 7151: 'A',
 7153: 'P',
 7154: 'P',
 7169: 'P',
 7293: 'P',
 7375: 'P',
 7451: 'P',
 7458: 'P',
 8529: 'P',
 8533: 'P',
 8636: 'P',
 8665: 'P',
 8808: 'A',
 8869: 'P',
 8961: 'N',
 8969: 'P',
 8970: 'N',
 30749: 'P',
 31433: 'N',
 31685: 'P',
 33145: 'A',
 33679: 'A',
 33794: 'P',
 40629: 'A',
 40819: 'P',
 41285: 'N',
 47099: 'A',
 49272: 'P',
 49286: 'P',
 51662: 'A',
 51705: 'P',
 51834: 'N',
 54286: 'P',
 56367: 'P',
 56949: 'A',
 58047: 'P',
 58559: 'P',
 59333: 'N',
 59421: 'N',
 60397: 'A',
 60950: 'N',
 61250: 'N',
 63113: 'P',
 63992: 'P',
 64969: 'N',
 66203: 'P',
 689

In [31]:
from itertools import combinations
from collections import defaultdict

def get_neighbors(ratings:dict, # ratings submitted by each user or by each movie
                       min_rating_num:int=5 # at least this many ratings are required for a comparison
                      ):
    
    '''
    Compute rating-based similarity between every two pairs of users or pairs of movies
    
    '''
    
    #get all possible pairs 
    pairs=list(combinations(list(ratings.keys()),2))
    
    sim=defaultdict(dict) # initialize the sim dictionary
    
    for id1,id2 in tqdm(pairs): # for every entity pair 
   
        #get a set with all the discretized ratings (movie/user id, polarity tuples) for x1 and x2
        s1=set([(xid,pol) for xid,pol in ratings[id1].items()])
        s2=set([(xid,pol) for xid,pol in ratings[id2].items()])

        # check if both users/movies respect the lower bound
        if len(s1)<min_rating_num or len(s2)<min_rating_num: continue
      
        # get the union and intersection for these two users/movies
        union=s1.union(s2)
        inter=s1.intersection(s2)
    
        # compute user/movie sim via the jaccard coeff
        jacc=len(inter)/len(union)

        # remember the sim values
        sim[id1][id2]=jacc
        sim[id2][id1]=jacc
        
    # attach each user/movie to its neighbors, sorted by sim in descending order 
    return {id_:sorted(sim[id_].items(),key=lambda x:x[1], reverse=True) for id_ in sim}
    
    
    

In [32]:
neighbors_u_v2=get_neighbors(user_ratings_v2)


00%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 185745/185745 [00:05<00:00, 36179.36it/s]

In [33]:
neighbors_u_v2[100]

[(357, 0.12025316455696203),
 (169, 0.11497326203208556),
 (330, 0.11267605633802817),
 (201, 0.11206896551724138),
 (200, 0.0979498861047836),
 (282, 0.09686609686609686),
 (354, 0.0935672514619883),
 (594, 0.08882521489971347),
 (597, 0.08839779005524862),
 (42, 0.08687615526802218),
 (84, 0.08620689655172414),
 (263, 0.08484848484848485),
 (239, 0.08375634517766498),
 (304, 0.08333333333333333),
 (45, 0.08316831683168317),
 (144, 0.08235294117647059),
 (66, 0.08114035087719298),
 (224, 0.08021390374331551),
 (570, 0.07961783439490445),
 (587, 0.07931034482758621),
 (301, 0.07818930041152264),
 (356, 0.07635467980295567),
 (370, 0.07111111111111111),
 (367, 0.0707395498392283),
 (562, 0.07062146892655367),
 (33, 0.07042253521126761),
 (453, 0.06993006993006994),
 (166, 0.06962025316455696),
 (220, 0.06927710843373494),
 (376, 0.06844106463878327),
 (198, 0.06779661016949153),
 (64, 0.06741573033707865),
 (572, 0.06690140845070422),
 (381, 0.06689536878216124),
 (202, 0.06576402321083

In [34]:
movie_ratings=load_ratings(ratings_df,['movieId','userId'])


00%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9724/9724 [00:02<00:00, 4315.73it/s]

In [35]:
movie_ratings[1]

{1: 'P',
 5: 'P',
 7: 'P',
 15: 'N',
 17: 'P',
 18: 'P',
 19: 'P',
 21: 'P',
 27: 'A',
 31: 'P',
 32: 'A',
 33: 'A',
 40: 'P',
 43: 'P',
 44: 'A',
 45: 'P',
 46: 'P',
 50: 'A',
 54: 'A',
 57: 'P',
 63: 'P',
 64: 'P',
 66: 'P',
 68: 'N',
 71: 'P',
 73: 'P',
 76: 'N',
 78: 'P',
 82: 'N',
 86: 'P',
 89: 'A',
 90: 'A',
 91: 'P',
 93: 'A',
 96: 'P',
 98: 'P',
 103: 'P',
 107: 'P',
 112: 'A',
 119: 'P',
 121: 'P',
 124: 'P',
 130: 'A',
 132: 'N',
 134: 'A',
 135: 'P',
 137: 'P',
 140: 'A',
 141: 'P',
 144: 'P',
 145: 'P',
 151: 'P',
 153: 'N',
 155: 'A',
 156: 'P',
 159: 'P',
 160: 'P',
 161: 'P',
 166: 'P',
 167: 'P',
 169: 'P',
 171: 'P',
 177: 'P',
 178: 'P',
 179: 'P',
 182: 'P',
 185: 'P',
 186: 'P',
 191: 'P',
 193: 'N',
 200: 'P',
 201: 'P',
 202: 'P',
 206: 'P',
 213: 'P',
 214: 'A',
 216: 'A',
 217: 'P',
 219: 'P',
 220: 'P',
 223: 'P',
 226: 'P',
 229: 'P',
 232: 'P',
 233: 'A',
 234: 'P',
 239: 'P',
 240: 'P',
 247: 'P',
 249: 'P',
 252: 'P',
 254: 'P',
 263: 'P',
 264: 'P',
 266:

In [36]:
neighbors_m=get_neighbors(movie_ratings)


00%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 47273226/47273226 [01:21<00:00, 581454.27it/s]

In [22]:
print(movies_df.loc[1]['title'])

my_neighbors=neighbors_m[1]

for n,sim_val in my_neighbors:
    
    print(movies_df.loc[n]['title'],sim_val)




Toy Story (1995)
Toy Story 2 (1999) 0.2631578947368421
Finding Nemo (2003) 0.2624113475177305
Shrek (2001) 0.26229508196721313
Star Wars: Episode IV - A New Hope (1977) 0.2594594594594595
Lion King, The (1994) 0.2564935064935065
Monsters, Inc. (2001) 0.2527075812274368
Aladdin (1992) 0.2476489028213166
Back to the Future (1985) 0.24516129032258063
Forrest Gump (1994) 0.2420091324200913
Star Wars: Episode V - The Empire Strikes Back (1980) 0.24198250728862974
Jurassic Park (1993) 0.2410958904109589
Incredibles, The (2004) 0.24087591240875914
Raiders of the Lost Ark (Indiana Jones and the Raiders of the Lost Ark) (1981) 0.23880597014925373
Indiana Jones and the Last Crusade (1989) 0.2326388888888889
Star Wars: Episode VI - Return of the Jedi (1983) 0.23053892215568864
Monty Python and the Holy Grail (1975) 0.22727272727272727
Shawshank Redemption, The (1994) 0.22580645161290322
Sixth Sense, The (1999) 0.2236024844720497
Princess Bride, The (1987) 0.2226027397260274
Groundhog Day (1993) 0

Interstellar (2014) 0.08679245283018867
Princess Mononoke (Mononoke-hime) (1997) 0.08677685950413223
Rounders (1998) 0.08658008658008658
From Russia with Love (1963) 0.08658008658008658
40-Year-Old Virgin, The (2005) 0.08646616541353383
Sleepy Hollow (1999) 0.08627450980392157
Some Like It Hot (1959) 0.0860655737704918
Karate Kid, The (1984) 0.08583690987124463
Leaving Las Vegas (1995) 0.08582089552238806
Blair Witch Project, The (1999) 0.08560311284046693
Snow White and the Seven Dwarfs (1937) 0.08550185873605948
Trading Places (1983) 0.08547008547008547
Elf (2003) 0.08547008547008547
Chasing Amy (1997) 0.08536585365853659
Raging Bull (1980) 0.0851063829787234
Exorcist, The (1973) 0.08502024291497975
Zombieland (2009) 0.08502024291497975
Starship Troopers (1997) 0.08455882352941177
Superbad (2007) 0.08433734939759036
Dumb & Dumber (Dumb and Dumber) (1994) 0.08411214953271028
Wedding Singer, The (1998) 0.084
101 Dalmatians (One Hundred and One Dalmatians) (1961) 0.08368200836820083
In 

Gran Torino (2008) 0.05668016194331984
Broken Arrow (1996) 0.05653710247349823
Addams Family Values (1993) 0.05653710247349823
John Wick (2014) 0.05627705627705628
Big Daddy (1999) 0.05622489959839357
Thomas Crown Affair, The (1999) 0.05622489959839357
Lost World: Jurassic Park, The (1997) 0.056179775280898875
Sabrina (1954) 0.05603448275862069
Brave (2012) 0.05603448275862069
Silver Linings Playbook (2012) 0.05603448275862069
City Slickers (1991) 0.055793991416309016
Babel (2006) 0.055793991416309016
Tommy Boy (1995) 0.055776892430278883
Rosemary's Baby (1968) 0.05555555555555555
Finding Neverland (2004) 0.05555555555555555
Punch-Drunk Love (2002) 0.05531914893617021
Rudy (1993) 0.05508474576271186
Running Man, The (1987) 0.05508474576271186
Jay and Silent Bob Strike Back (2001) 0.05508474576271186
Brokeback Mountain (2005) 0.05508474576271186
My Fair Lady (1964) 0.05485232067510549
Postman, The (Postino, Il) (1994) 0.05439330543933055
Fantastic Mr. Fox (2009) 0.05429864253393665
City

Outlaw Josey Wales, The (1976) 0.04017857142857143
Spy Who Loved Me, The (1977) 0.04017857142857143
Hellboy II: The Golden Army (2008) 0.04017857142857143
Hugo (2011) 0.04017857142857143
Escape from L.A. (1996) 0.040160642570281124
Muppet Christmas Carol, The (1992) 0.04
Payback (1999) 0.04
Waking Life (2001) 0.04
Spotlight (2015) 0.04
22 Jump Street (2014) 0.04
Robots (2005) 0.04
Me, Myself & Irene (2000) 0.0398406374501992
Streetcar Named Desire, A (1951) 0.03982300884955752
Short Circuit (1986) 0.03982300884955752
Megamind (2010) 0.03982300884955752
Repo Man (1984) 0.039647577092511016
Charlotte's Web (1973) 0.039647577092511016
For Your Eyes Only (1981) 0.039647577092511016
And Your Mother Too (Y tu mamá también) (2001) 0.039647577092511016
Snowpiercer (2013) 0.039647577092511016
Terms of Endearment (1983) 0.039473684210526314
Sleeping Beauty (1959) 0.039473684210526314
Simple Plan, A (1998) 0.039473684210526314
River Runs Through It, A (1992) 0.039473684210526314
Evil Dead, The (1

Chronicles of Riddick, The (2004) 0.030303030303030304
Click (2006) 0.030303030303030304
Crocodile Dundee II (1988) 0.03017241379310345
Scrooged (1988) 0.03017241379310345
Mad Max Beyond Thunderdome (1985) 0.03017241379310345
xXx (2002) 0.03017241379310345
Freaky Friday (2003) 0.03017241379310345
Holiday, The (2006) 0.03017241379310345
Teenage Mutant Ninja Turtles (1990) 0.030042918454935622
28 Days (2000) 0.030042918454935622
Her (2013) 0.030042918454935622
Evolution (2001) 0.029787234042553193
Robin Hood: Prince of Thieves (1991) 0.029661016949152543
Step Brothers (2008) 0.029661016949152543
Courage Under Fire (1996) 0.029535864978902954
Dracula (Bram Stoker's Dracula) (1992) 0.029535864978902954
She's All That (1999) 0.029535864978902954
Scream 3 (2000) 0.029535864978902954
Down Periscope (1996) 0.029288702928870293
Scream 2 (1997) 0.029288702928870293
Wild Things (1998) 0.029288702928870293
Home Alone 2: Lost in New York (1992) 0.029288702928870293
Quick and the Dead, The (1995) 0.

Believer, The (2001) 0.018433179723502304
Peter Pan (2003) 0.018433179723502304
Murder on the Orient Express (1974) 0.018433179723502304
Prairie Home Companion, A (2006) 0.018433179723502304
Art School Confidential (2006) 0.018433179723502304
Family Guy Presents: It's a Trap (2010) 0.018433179723502304
August Rush (2007) 0.018433179723502304
Colombiana (2011) 0.018433179723502304
Real Steel (2011) 0.018433179723502304
Master, The (2012) 0.018433179723502304
Boy in the Striped Pajamas, The (Boy in the Striped Pyjamas, The) (2008) 0.018433179723502304
The Magnificent Seven (2016) 0.018433179723502304
Kicking and Screaming (1995) 0.01834862385321101
Don't Be a Menace to South Central While Drinking Your Juice in the Hood (1996) 0.01834862385321101
Romper Stomper (1992) 0.01834862385321101
Lords of Dogtown (2005) 0.01834862385321101
Top Hat (1935) 0.01834862385321101
Four Brothers (2005) 0.01834862385321101
My Bodyguard (1980) 0.01834862385321101
Practical Magic (1998) 0.01834862385321101


Preacher's Wife, The (1996) 0.013824884792626729
Prefontaine (1997) 0.013824884792626729
Ulee's Gold (1997) 0.013824884792626729
Mrs. Brown (a.k.a. Her Majesty, Mrs. Brown) (1997) 0.013824884792626729
Hansel & Gretel: Witch Hunters (2013) 0.013824884792626729
Tender Mercies (1983) 0.013824884792626729
Journey of Natty Gann, The (1985) 0.013824884792626729
Them! (1954) 0.013824884792626729
Wolf Man, The (1941) 0.013824884792626729
Jules and Jim (Jules et Jim) (1961) 0.013824884792626729
Hang 'Em High (1968) 0.013824884792626729
Palm Beach Story, The (1942) 0.013824884792626729
Peter's Friends (1992) 0.013824884792626729
Place Beyond the Pines, The (2012) 0.013824884792626729
Agnes of God (1985) 0.013824884792626729
Beyond the Mat (1999) 0.013824884792626729
Auntie Mame (1958) 0.013824884792626729
Anchors Aweigh (1945) 0.013824884792626729
Coming Home (1978) 0.013824884792626729
Pain & Gain (2013) 0.013824884792626729
Shane (1953) 0.013824884792626729
Before Night Falls (2000) 0.01382488

Last Detail, The (1973) 0.013636363636363636
Boys from Brazil, The (1978) 0.013636363636363636
Return to Me (2000) 0.013636363636363636
Big Kahuna, The (2000) 0.013636363636363636
Tigerland (2000) 0.013636363636363636
Ali (2001) 0.013636363636363636
Dragon: The Bruce Lee Story (1993) 0.013636363636363636
Food, Inc. (2008) 0.013636363636363636
Treasure Planet (2002) 0.013636363636363636
Life of David Gale, The (2003) 0.013636363636363636
What a Girl Wants (2003) 0.013636363636363636
Andromeda Strain, The (1971) 0.013636363636363636
Calendar Girls (2003) 0.013636363636363636
Win a Date with Tad Hamilton! (2004) 0.013636363636363636
Jersey Girl (2004) 0.013636363636363636
Bad Education (La mala educación) (2004) 0.013636363636363636
Room (2015) 0.013636363636363636
Volver (2006) 0.013636363636363636
Hail, Caesar! (2016) 0.013636363636363636
It's Kind of a Funny Story (2010) 0.013636363636363636
Breach (2007) 0.013636363636363636
Kung Fu Panda 3 (2016) 0.013636363636363636
Heartbreak Kid, 

Airplane II: The Sequel (1982) 0.012987012987012988
Stuart Little (1999) 0.012987012987012988
Star Trek: Nemesis (2002) 0.012987012987012988
Brothers Grimm, The (2005) 0.01293103448275862
6th Day, The (2000) 0.01293103448275862
Wyatt Earp (1994) 0.012875536480686695
RoboCop 3 (1993) 0.012875536480686695
Super Mario Bros. (1993) 0.012875536480686695
Weekend at Bernie's (1989) 0.012875536480686695
Mission to Mars (2000) 0.012875536480686695
Aeon Flux (2005) 0.012875536480686695
13 Going on 30 (2004) 0.012875536480686695
Drop Zone (1994) 0.01282051282051282
The Scorpion King (2002) 0.01282051282051282
Twilight (2008) 0.01282051282051282
Sphere (1998) 0.01276595744680851
Little Nicky (2000) 0.012711864406779662
Pushing Tin (1999) 0.012658227848101266
Buffy the Vampire Slayer (1992) 0.012605042016806723
Island of Dr. Moreau, The (1996) 0.012552301255230125
Lost in Space (1998) 0.012448132780082987
Deep Blue Sea (1999) 0.012448132780082987
Star Trek V: The Final Frontier (1989) 0.01239669421

Just Like Heaven (2005) 0.00904977375565611
Brewster's Millions (1985) 0.00904977375565611
House of Games (1987) 0.00904977375565611
Mannequin (1987) 0.00904977375565611
Masters of the Universe (1987) 0.00904977375565611
Catch-22 (1970) 0.00904977375565611
Seven Year Itch, The (1955) 0.00904977375565611
Suspiria (1977) 0.00904977375565611
Gorillas in the Mist (1988) 0.00904977375565611
G.I. Joe: The Rise of Cobra (2009) 0.00904977375565611
Last Castle, The (2001) 0.00904977375565611
Breathless (À bout de souffle) (1960) 0.00904977375565611
Flash Gordon (1980) 0.00904977375565611
Windtalkers (2002) 0.00904977375565611
Eight Legged Freaks (2002) 0.00904977375565611
Analyze That (2002) 0.00904977375565611
Dennis the Menace (1993) 0.00904977375565611
Core, The (2003) 0.00904977375565611
Proposition, The (2005) 0.00904977375565611
Videodrome (1983) 0.00904977375565611
Don Jon (2013) 0.00904977375565611
Show Me Love (Fucking Åmål) (1998) 0.00904977375565611
Aguirre: The Wrath of God (Aguirre

Bride of Chucky (Child's Play 4) (1998) 0.004545454545454545
Mighty Joe Young (1949) 0.004545454545454545
Stepmom (1998) 0.004545454545454545
Other Sister, The (1999) 0.004545454545454545
Rage: Carrie 2, The (1999) 0.004545454545454545
Cat's Eye (1985) 0.004545454545454545
American Movie (1999) 0.004545454545454545
Omega Man, The (1971) 0.004545454545454545
Meatballs Part II (1984) 0.004545454545454545
Poison Ivy (1992) 0.004545454545454545
Mansfield Park (1999) 0.004545454545454545
Man Bites Dog (C'est arrivé près de chez vous) (1992) 0.004545454545454545
She-Devil (1989) 0.004545454545454545
She's Gotta Have It (1986) 0.004545454545454545
Taking of Pelham 1 2 3, The (2009) 0.004545454545454545
Pokémon the Movie 2000 (2000) 0.004545454545454545
Fantastic Voyage (1966) 0.004545454545454545
Ladies Man, The (2000) 0.004545454545454545
Eagle Has Landed, The (1976) 0.004545454545454545
Eddie Murphy Raw (1987) 0.004545454545454545
Mermaids (1990) 0.004545454545454545
Tomcats (2001) 0.004545

NeverEnding Story III, The (1994) 0.0
Lord of Illusions (1995) 0.0
Safe (1995) 0.0
Exit to Eden (1994) 0.0
Heavyweights (Heavy Weights) (1995) 0.0
Love Affair (1994) 0.0
Man of the House (1995) 0.0
Mixed Nuts (1994) 0.0
Only You (1994) 0.0
Ready to Wear (Pret-A-Porter) (1994) 0.0
Higher Learning (1995) 0.0
I Love Trouble (1994) 0.0
Speechless (1994) 0.0
House of Wax (2005) 0.0
Highlander III: The Sorcerer (a.k.a. Highlander: The Final Dimension) (1994) 0.0
Air Up There, The (1994) 0.0
Bad Girls (1994) 0.0
Blink (1994) 0.0
Boxing Helena (1993) 0.0
Clean Slate (1994) 0.0
Cops and Robbersons (1994) 0.0
Even Cowgirls Get the Blues (1993) 0.0
For Love or Money (1993) 0.0
Getaway, The (1994) 0.0
Made in America (1993) 0.0
North (1994) 0.0
Terminal Velocity (1994) 0.0
Threesome (1994) 0.0
Hellraiser: Bloodline (1996) 0.0
Last Supper, The (1995) 0.0
Feeling Minnesota (1996) 0.0
Celtic Pride (1996) 0.0
Great White Hype, The (1996) 0.0
First Kid (1996) 0.0
Glimmer Man, The (1996) 0.0
Parent Trap

In [23]:

def recommend_mb(user, movies, neighbors_m, neighbor_num, rec_num):
  
    '''
    Movie-based recommendations
    
    For each user, get the list of all the movies they have rated positively .
    For each of these movies, find its neighbor_num most similar movies
    
    Every candidate movie gets a +1 if it appears in the top-k neighbors of a movie 
    that the user has rated positively. 
    
    The +1 vote is scaled based on the sim of the candidate to the movie that the user liked.
    
    '''

    # get all the movies rated by this user
    my_ratings=ratings_df[ratings_df.userId==user][['movieId','rating']]

    #convert them to a dict
    my_ratings=dict(zip(my_ratings.movieId, my_ratings.rating.apply(discretize_rating)))

    # votes for each movie 
    votes=defaultdict(int)
    
    for mid,pol in my_ratings.items(): # for each movie rated by this user 
        
        if pol!='P': continue  # only consider positively rated omvies 
            
        if mid in neighbors_m: 
            mid_neighbors=neighbors_m[mid][:neighbor_num]# get the top-k neighbors of this movie  
    
        for neighbor,sim_val in mid_neighbors: # for each neighbor
            votes[neighbor]+=sim_val # add a scaled vote
    
    # sort candidates by their scaled votes
    srt=sorted(votes.items(),key=lambda x:x[1], reverse=True)
    
    print('\nI suggest the following movies because they are similar to the movies you already like:\n')
     
    cnt=0
    
    already_rated={}
    
    for mov, score in srt:
    
        title=movies.loc[mov]['title']
        
        rat=my_ratings.get(mov,None)
        
        if rat:
            already_rated[title]=rat
            continue
    
        cnt+=1
        
        print('\n',mov, title, score)
    
        if cnt==rec_num:break
    
    print('\n',already_rated)


In [24]:
recommend_mb(100, movies_df, neighbors_m, 10, 10)


I suggest the following movies because they are similar to the movies you already like:


 2918 Ferris Bueller's Day Off (1986) 3.450581012214743

 1136 Monty Python and the Holy Grail (1975) 2.8000175706903683

 1259 Stand by Me (1986) 1.8970044611904782

 4963 Ocean's Eleven (2001) 1.772318374610611

 1210 Star Wars: Episode VI - Return of the Jedi (1983) 1.7597199608782386

 457 Fugitive, The (1993) 1.7184235469222315

 1196 Star Wars: Episode V - The Empire Strikes Back (1980) 1.7038937000050227

 2959 Fight Club (1999) 1.6618394457518646

 1089 Reservoir Dogs (1992) 1.6551670264976397

 1291 Indiana Jones and the Last Crusade (1989) 1.588542177133935

 {'Breakfast Club, The (1985)': 'P', 'Princess Bride, The (1987)': 'P', 'When Harry Met Sally... (1989)': 'P', "Singin' in the Rain (1952)": 'P', 'Office Space (1999)': 'P', 'Goodfellas (1990)': 'P', 'Mary Poppins (1964)': 'P', 'Groundhog Day (1993)': 'P', 'Beautiful Mind, A (2001)': 'P'}
