In [20]:
import numpy as np
import pandas as pd
from surprise import Reader, Dataset, SVD, evaluate

In [10]:
movie = pd.read_csv('movie.csv')
rating = pd.read_csv('rating.csv')

In [11]:
movie.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [12]:
rating.drop('Unnamed: 0', axis=1, inplace=True)
rating.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,2,3.5,2005-04-02 23:53:47
1,1,29,3.5,2005-04-02 23:31:16
2,1,32,3.5,2005-04-02 23:33:39
3,1,47,3.5,2005-04-02 23:32:07
4,1,50,3.5,2005-04-02 23:29:40


In [13]:
combined_data = pd.merge(rating, movie, on='movieId')
combined_data.head()

Unnamed: 0,userId,movieId,rating,timestamp,title,genres
0,1,2,3.5,2005-04-02 23:53:47,Jumanji (1995),Adventure|Children|Fantasy
1,5,2,3.0,1996-12-25 15:26:09,Jumanji (1995),Adventure|Children|Fantasy
2,13,2,3.0,1996-11-27 08:19:02,Jumanji (1995),Adventure|Children|Fantasy
3,29,2,3.0,1996-06-23 20:36:14,Jumanji (1995),Adventure|Children|Fantasy
4,34,2,3.0,1996-10-28 13:29:44,Jumanji (1995),Adventure|Children|Fantasy


### What did user 1 like in the past?

In [27]:
user = combined_data[combined_data.userId == 1]
user_like = user[user.rating>=4.5].loc[:,['userId', 'movieId', 'title', 'rating']]
user_like

Unnamed: 0,userId,movieId,title,rating
9407,1,1196,Star Wars: Episode V - The Empire Strikes Back...,4.5
9874,1,1198,Raiders of the Lost Ark (Indiana Jones and the...,4.5
22618,1,4993,"Lord of the Rings: The Fellowship of the Ring,...",5.0
23353,1,5952,"Lord of the Rings: The Two Towers, The (2002)",5.0
24476,1,7153,"Lord of the Rings: The Return of the King, The...",5.0
25265,1,8507,Freaks (1932),5.0
25276,1,8636,Spider-Man 2 (2004),4.5


### Let's predict top 5 movies user 1 would love to watch:

In [29]:
reader = Reader()
svd = SVD()

data = Dataset.load_from_df(combined_data[['userId', 'movieId', 'rating']], reader)
trainset = data.build_full_trainset()
svd.fit(trainset)

user_would_like = user_like.copy()
user_would_like['estimate_score'] = user_would_like['movieId'].apply(lambda x: svd.predict(1, x).est)

#user_would_like = user_would_like.drop('movieId', axis = 1)

user_would_like = user_would_like.sort_values('estimate_score', ascending=False)
user_would_like.head()

Unnamed: 0,userId,movieId,title,rating,estimate_score
24476,1,7153,"Lord of the Rings: The Return of the King, The...",5.0,4.692548
22618,1,4993,"Lord of the Rings: The Fellowship of the Ring,...",5.0,4.442688
23353,1,5952,"Lord of the Rings: The Two Towers, The (2002)",5.0,4.3995
9407,1,1196,Star Wars: Episode V - The Empire Strikes Back...,4.5,4.369927
9874,1,1198,Raiders of the Lost Ark (Indiana Jones and the...,4.5,4.213067
