# setup packages and load data

In [1]:
import numpy as np
import pandas as pd

In [2]:
location = "D:/py_movie_recommendation_system/data/"
movies_df = pd.read_csv(location+"movies.csv")
ratings_df = pd.read_csv(location+"ratings.csv")
links_df = pd.read_csv(location+"links.csv")
tags_df = pd.read_csv(location+"tags.csv")

# data preprocessing

In [3]:
# drop useless column
movie_ratings=ratings_df.drop('timestamp', axis=1)

In [4]:
movie_ratings.head()

Unnamed: 0,userId,movieId,rating
0,1,1,4.0
1,1,3,4.0
2,1,6,4.0
3,1,47,5.0
4,1,50,5.0


In [5]:
# convert data type
user_movie = movie_ratings[["userId", "movieId"]].astype(int)
rating = movie_ratings["rating"].astype(float)
movie_ratings = pd.concat([user_movie, rating], axis=1)

In [6]:
movie_ratings.head()

Unnamed: 0,userId,movieId,rating
0,1,1,4.0
1,1,3,4.0
2,1,6,4.0
3,1,47,5.0
4,1,50,5.0


# train test split

In [29]:
from surprise import Reader, Dataset
from surprise.model_selection import train_test_split
reader = Reader()
data = Dataset.load_from_df(movie_ratings[["userId", "movieId", "rating"]], reader)

In [30]:
train, test = train_test_split(data, test_size=0.2)

# define and tune model

In [32]:
from surprise import SVD
from surprise.model_selection import GridSearchCV

param_grid = {'n_factors': [25, 50, 100, 150, 200],'n_epochs': [10, 20, 30, 40, 50], 'lr_all': [0.001, 0.005, 0.01, 0.05, 0.1],'reg_all':[0.005, 0.001, 0.05, 0.01, 0.1]}

gscv = GridSearchCV(SVD, param_grid, measures=["rmse"], cv=5)
gscv.fit(data)

In [33]:
params = gscv.best_params['rmse']
svd = SVD(n_factors=params['n_factors'], n_epochs=params['n_epochs'],lr_all=params['lr_all'], reg_all=params['reg_all'])

# test best model

In [34]:
from surprise import accuracy
predictions = svd.fit(train).test(test)
accuracy.rmse(predictions, verbose=True)

RMSE: 0.8521


0.8521330060166512

In [36]:
# save model
from surprise import dump
dump.dump("D:/py_movie_recommendation_system/EDA&Model/svd", algo=svd)

# qualitative check

In [82]:
# get recommendation for user
def get_top_n(best_model, uid, n=10, location="D:/py_movie_recommendation_system/data/"):
    movies_df = pd.read_csv(location+"movies.csv")
    ratings_df = pd.read_csv(location+"ratings.csv").drop("timestamp", axis=1)
    users = list(ratings_df.userId.unique())
    if uid not in users:
        print(f"user {uid} is not in the database.")
        print("Here are the top reviewed movies:")
        top = ratings_df[["movieId", "rating"]].groupby("movieId").count().reset_index().sort_values("rating", ascending=False).reset_index(drop=True).drop("rating", axis=1)
        all_top = pd.merge(top, movies_df, left_on="movieId", right_on="movieId").drop("movieId", axis=1)
        recommendation = all_top.head(n).reset_index(drop=True)
        print(recommendation)
        return recommendation
    movies_df["estimate_rating"] = movies_df["movieId"].apply(lambda x: best_model.predict(uid, x).est)
    user_recommendation = movies_df.drop("movieId", axis=1)
    user_recommendation = user_recommendation.sort_values("estimate_rating", ascending=False)
    recommendation = user_recommendation.head(n).reset_index(drop=True)
    print(recommendation)
    return recommendation

In [85]:
# for known user
get_top_n(svd, 1, 10)

                                               title  \
0                                         Ran (1985)   
1                            Five Easy Pieces (1970)   
2                            Band of Brothers (2001)   
3                   Shawshank Redemption, The (1994)   
4  Man Bites Dog (C'est arrivé près de chez vous)...   
5                                Hustler, The (1961)   
6                                   Jetée, La (1962)   
7                              Secrets & Lies (1996)   
8   Three Billboards Outside Ebbing, Missouri (2017)   
9                            Harold and Maude (1971)   

                        genres  estimate_rating  
0                    Drama|War         5.000000  
1                        Drama         5.000000  
2             Action|Drama|War         5.000000  
3                  Crime|Drama         5.000000  
4  Comedy|Crime|Drama|Thriller         5.000000  
5                        Drama         5.000000  
6               Romance|Sci-Fi   

Unnamed: 0,title,genres,estimate_rating
0,Ran (1985),Drama|War,5.0
1,Five Easy Pieces (1970),Drama,5.0
2,Band of Brothers (2001),Action|Drama|War,5.0
3,"Shawshank Redemption, The (1994)",Crime|Drama,5.0
4,Man Bites Dog (C'est arrivé près de chez vous)...,Comedy|Crime|Drama|Thriller,5.0
5,"Hustler, The (1961)",Drama,5.0
6,"Jetée, La (1962)",Romance|Sci-Fi,5.0
7,Secrets & Lies (1996),Drama,5.0
8,"Three Billboards Outside Ebbing, Missouri (2017)",Crime|Drama,5.0
9,Harold and Maude (1971),Comedy|Drama|Romance,4.998473


In [86]:
# for unknown user
get_top_n(svd, 1111111, 10)

user 1111111 is not in the database.
Here are the top reviewed movies:
                                       title                            genres
0                        Forrest Gump (1994)          Comedy|Drama|Romance|War
1           Shawshank Redemption, The (1994)                       Crime|Drama
2                        Pulp Fiction (1994)       Comedy|Crime|Drama|Thriller
3           Silence of the Lambs, The (1991)             Crime|Horror|Thriller
4                         Matrix, The (1999)            Action|Sci-Fi|Thriller
5  Star Wars: Episode IV - A New Hope (1977)           Action|Adventure|Sci-Fi
6                       Jurassic Park (1993)  Action|Adventure|Sci-Fi|Thriller
7                          Braveheart (1995)                  Action|Drama|War
8          Terminator 2: Judgment Day (1991)                     Action|Sci-Fi
9                    Schindler's List (1993)                         Drama|War


Unnamed: 0,title,genres
0,Forrest Gump (1994),Comedy|Drama|Romance|War
1,"Shawshank Redemption, The (1994)",Crime|Drama
2,Pulp Fiction (1994),Comedy|Crime|Drama|Thriller
3,"Silence of the Lambs, The (1991)",Crime|Horror|Thriller
4,"Matrix, The (1999)",Action|Sci-Fi|Thriller
5,Star Wars: Episode IV - A New Hope (1977),Action|Adventure|Sci-Fi
6,Jurassic Park (1993),Action|Adventure|Sci-Fi|Thriller
7,Braveheart (1995),Action|Drama|War
8,Terminator 2: Judgment Day (1991),Action|Sci-Fi
9,Schindler's List (1993),Drama|War


In [None]:
#