## Import packages

In [175]:
import pandas as pd
from CFModel import CFModel, DeepModel

## Define constants


In [176]:
RATINGS_CSV_FILE = 'ml1m_ratings.csv'
USERS_CSV_FILE = 'ml1m_users.csv'
MOVIES_CSV_FILE = 'ml1m_movies.csv'
MODEL_WEIGHTS_FILE = 'ml1m_weights.h5'
K_FACTORS = 200
TEST_USER = 123

## Load MovieLens 1M data

In [177]:
ratings = pd.read_csv(RATINGS_CSV_FILE, sep='\t', encoding='latin-1', usecols=['userid', 'movieid', 'rating'])
max_userid = ratings['userid'].drop_duplicates().max()
max_movieid = ratings['movieid'].drop_duplicates().max()
print len(ratings), 'ratings loaded.'

1000209 ratings loaded.


In [178]:
users = pd.read_csv(USERS_CSV_FILE, sep='\t', encoding='latin-1', usecols=['userid', 'gender', 'zipcode', 'age_desc', 'occ_desc'])
print len(users), 'descriptions of', max_userid, 'users loaded.'

6040 descriptions of 6040 users loaded.


In [179]:
movies = pd.read_csv(MOVIES_CSV_FILE, sep='\t', encoding='latin-1', usecols=['movieid', 'title', 'genre'])
print len(movies), 'descriptions of', max_movieid, 'movies loaded.'

3883 descriptions of 3952 movies loaded.


## Make recommendations for a given user

In [180]:
trained_model = DeepModel(max_userid, max_movieid, K_FACTORS)

In [181]:
#trained_model.load_weights('my_model_weights.h5') #MODEL_WEIGHTS_FILE)
trained_model.load_weights( MODEL_WEIGHTS_FILE)

In [206]:
TEST_USER = 555
users[users['userid'] == TEST_USER]

Unnamed: 0,userid,gender,zipcode,age_desc,occ_desc
554,555,M,53213,18-24,college/grad student


In [207]:
def predict_rating(userid, movieid):
    return trained_model.rate(userid - 1, movieid - 1)

In [208]:
user_ratings = ratings[ratings['userid'] == TEST_USER][['userid', 'movieid', 'rating']]
user_ratings['prediction'] = user_ratings.apply(lambda x: predict_rating(TEST_USER, x['movieid']), axis=1)

print( user_ratings.shape )
user_ratings.sort_values(by='rating', 
                         ascending=False).merge(movies, 
                                                on='movieid', 
                                                how='inner', 
                                                suffixes=['_u', '_m']).head(10)

(38, 4)


Unnamed: 0,userid,movieid,rating,prediction,title,genre
0,555,908,5,4.609445,North by Northwest (1959),Drama|Thriller
1,555,1617,5,4.485713,L.A. Confidential (1997),Crime|Film-Noir|Mystery|Thriller
2,555,3114,5,4.451399,Toy Story 2 (1999),Animation|Children's|Comedy
3,555,1,5,4.421688,Toy Story (1995),Animation|Children's|Comedy
4,555,745,5,4.62115,"Close Shave, A (1995)",Animation|Comedy|Thriller
5,555,3751,5,4.069724,Chicken Run (2000),Animation|Children's|Comedy
6,555,3408,5,3.950705,Erin Brockovich (2000),Drama
7,555,1148,5,4.686536,"Wrong Trousers, The (1993)",Animation|Comedy
8,555,2761,5,4.211873,"Iron Giant, The (1999)",Animation|Children's
9,555,3793,4,3.80841,X-Men (2000),Action|Sci-Fi


In [209]:
recommendations = ratings[ratings['movieid'].isin(user_ratings['movieid']) == False][['movieid']].drop_duplicates()
print(recommendations.shape)
recommendations['prediction'] = recommendations.apply(lambda x: predict_rating(TEST_USER, x['movieid']), axis=1)

(3668, 1)


In [210]:
recommendations.sort_values(by='prediction',
                          ascending=False).merge(movies,
                                                 on='movieid',
                                                 how='inner',
                                                 suffixes=['_u', '_m']).head(10)

Unnamed: 0,movieid,prediction,title,genre
0,53,4.948275,Lamerica (1994),Drama
1,2503,4.900401,"Apple, The (Sib) (1998)",Drama
2,3233,4.893898,Smashing Time (1967),Comedy
3,787,4.890905,"Gate of Heavenly Peace, The (1995)",Documentary
4,2905,4.865599,Sanjuro (1962),Action|Adventure
5,557,4.853518,Mamma Roma (1962),Drama
6,858,4.845753,"Godfather, The (1972)",Action|Crime|Drama
7,527,4.804768,Schindler's List (1993),Drama|War
8,3338,4.801312,For All Mankind (1989),Documentary
9,2019,4.800545,Seven Samurai (The Magnificent Seven) (Shichin...,Action|Drama


In [211]:
movies[ movies.movieid == 1]

Unnamed: 0,movieid,title,genre
0,1,Toy Story (1995),Animation|Children's|Comedy


In [216]:
TEST_USER = 2222
recs = movies.copy()
recs['prediction'] = recs.apply( lambda x: predict_rating(TEST_USER, x['movieid']), axis=1)
user_ratings = ratings[ratings['userid'] == TEST_USER][['userid', 'movieid', 'rating']]
user_ratings.sort_values( by='rating', ascending = False).merge(movies, 
                                                on='movieid', 
                                                how='inner', 
                                                suffixes=['_u', '_m']).head(15)


Unnamed: 0,userid,movieid,rating,title,genre
0,2222,2671,5,Notting Hill (1999),Comedy|Romance
1,2222,1784,5,As Good As It Gets (1997),Comedy|Drama
2,2222,3548,5,Auntie Mame (1958),Comedy|Drama
3,2222,3448,5,"Good Morning, Vietnam (1987)",Comedy|Drama|War
4,2222,1353,5,"Mirror Has Two Faces, The (1996)",Comedy|Romance
5,2222,2156,5,"Best Man, The (Il Testimone dello sposo) (1997)",Comedy|Drama
6,2222,520,5,Robin Hood: Men in Tights (1993),Comedy
7,2222,318,5,"Shawshank Redemption, The (1994)",Drama
8,2222,2302,5,My Cousin Vinny (1992),Comedy
9,2222,527,5,Schindler's List (1993),Drama|War


In [217]:
recs.sort_values(by='prediction', ascending=False, inplace=True)
recs.head(n =10)

Unnamed: 0,movieid,title,genre,prediction
2693,2762,"Sixth Sense, The (1999)",Thriller,4.415385
2128,2197,Firelight (1997),Drama,4.405443
2060,2129,"Saltmen of Tibet, The (1997)",Documentary,4.404098
568,572,Foreign Student (1994),Drama,4.390558
315,318,"Shawshank Redemption, The (1994)",Drama,4.384086
3078,3147,"Green Mile, The (1999)",Drama|Thriller,4.324142
3164,3233,Smashing Time (1967),Comedy,4.32264
2502,2571,"Matrix, The (1999)",Action|Sci-Fi|Thriller,4.314843
1180,1198,Raiders of the Lost Ark (1981),Action|Adventure,4.279438
594,598,Window to Paris (1994),Comedy,4.272771
