## Recommend with Collaborative Filtering

In [131]:
import numpy as np
import pandas as pd
from surprise import Reader, Dataset, SVD, evaluate

In [132]:
movie = pd.read_csv('movie.csv')
rating = pd.read_csv('rating.csv')

In [133]:
movie.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [134]:
rating.drop('Unnamed: 0', axis=1, inplace=True)
rating.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,2,3.5,2005-04-02 23:53:47
1,1,29,3.5,2005-04-02 23:31:16
2,1,32,3.5,2005-04-02 23:33:39
3,1,47,3.5,2005-04-02 23:32:07
4,1,50,3.5,2005-04-02 23:29:40


In [135]:
combined_data = pd.merge(rating, movie, on='movieId')
combined_data.head()

Unnamed: 0,userId,movieId,rating,timestamp,title,genres
0,1,2,3.5,2005-04-02 23:53:47,Jumanji (1995),Adventure|Children|Fantasy
1,5,2,3.0,1996-12-25 15:26:09,Jumanji (1995),Adventure|Children|Fantasy
2,13,2,3.0,1996-11-27 08:19:02,Jumanji (1995),Adventure|Children|Fantasy
3,29,2,3.0,1996-06-23 20:36:14,Jumanji (1995),Adventure|Children|Fantasy
4,34,2,3.0,1996-10-28 13:29:44,Jumanji (1995),Adventure|Children|Fantasy


### What movies did user 9 like most in the past?

In [151]:
user = combined_data[combined_data.userId == 9]
user_like = user[user.rating>=4.5].loc[:,['movieId', 'title', 'rating']]
user_like

Unnamed: 0,movieId,title,rating
15565,1997,"Exorcist, The (1973)",5.0
19285,2959,Fight Club (1999),5.0
34899,858,"Godfather, The (1972)",5.0
72893,3798,What Lies Beneath (2000),5.0
82624,4148,Hannibal (2001),5.0


### Let's predict top 5 movies user 9 would love to watch:

In [143]:
reader = Reader()
svd = SVD()

# feed svd
data = Dataset.load_from_df(combined_data[['userId', 'movieId', 'rating']], reader)
trainset = data.build_full_trainset()
svd.fit(trainset)

# calculate estimate_score
user_would_like = movie[['movieId', 'title']].copy()
user_would_like['estimate_score'] = user_would_like['movieId'].apply(lambda x: svd.predict(9, x).est)

# rank movies by estimate_score
user_would_like = user_would_like.sort_values('estimate_score', ascending=False)

In [144]:
user_would_like.head(5)

Unnamed: 0,movieId,title,estimate_score
523,527,Schindler's List (1993),4.404483
1181,1207,To Kill a Mockingbird (1962),4.361514
5779,5878,Talk to Her (Hable con Ella) (2002),4.327903
6274,6380,Capturing the Friedmans (2003),4.327355
257,260,Star Wars: Episode IV - A New Hope (1977),4.309747


### Create a user-defined function to make recommendation reusable

In [149]:
def movie_recommender(userID):
    # What did user like in the past
    user = combined_data[combined_data.userId == userID]
    user_like = user[user.rating>=4.5].loc[:,['userId', 'movieId', 'title', 'rating']]
    
    # Predict top 5 movies user would love to watch
    reader = Reader()
    svd = SVD()
    ## feed svd
    data = Dataset.load_from_df(combined_data[['userId', 'movieId', 'rating']], reader)
    trainset = data.build_full_trainset()
    svd.fit(trainset)
    ## calculate estimate_score
    user_would_like = movie[['movieId', 'title']].copy()
    user_would_like['estimate_score'] = user_would_like['movieId'].apply(lambda x: svd.predict(userID, x).est)
    ## rank movies by estimate_score
    user_would_like = user_would_like.sort_values('estimate_score', ascending=False)
    
    return user_would_like.head(5)

In [150]:
# predict top 5 movies user 100 would love to watch:
movie_recommender(100)

Unnamed: 0,movieId,title,estimate_score
49,50,"Usual Suspects, The (1995)",4.41025
7356,7502,Band of Brothers (2001),4.403445
9746,31658,Howl's Moving Castle (Hauru no ugoku shiro) (2...,4.370993
4132,4226,Memento (2000),4.356589
2873,2959,Fight Club (1999),4.345627
