# Recommender System
-- Movie recommendation

In [None]:
import pandas as pd
from surprise import SVD
from surprise import KNNBaseline
from surprise.model_selection import train_test_split
from surprise.model_selection import LeaveOneOut
from surprise import Reader
from surprise import Dataset
from surprise import accuracy
from surprise.model_selection import train_test_split
from collections import defaultdict

In [None]:
movie_file = '../data/movies_recommender.csv'
ratings_file = '../data/ratings_recommender.csv'

movies = pd.read_csv(movie_file)
print ('Movie data shape: ', movies.shape)
print ('Movie data columns: ', movies.columns)
print ('Movies head: \n', movies.head(3))

ratings = pd.read_csv(ratings_file)
print ('\n\nRating data shape: ', ratings.shape)
print ('Rating data columns: ', ratings.columns)
print ('Ratings head: \n', ratings.head(3))

### Data Merge

In [None]:
df = pd.merge(movies, ratings, on='movieId', how='inner')
print ('Merged dataframe shape: ', df.shape)
df.head(5)

In [None]:
# describe data
df.describe().transpose()

In [None]:
# get min and max rating
rating_min = (df[['rating']].min())[0]
rating_max = (df[['rating']].max())[0]
print ('Min rating', rating_min)
print ('Max rating', rating_max)

reader = Reader(rating_scale=(rating_min, rating_max))
data = Dataset.load_from_df(df[['userId', 'movieId', 'rating']], reader)
print ('Data shape: ', data.df.shape)
data.df.head(5)

### Build the model

In [None]:
# Split
trainSet, testSet = train_test_split(data, test_size=.25, random_state=0)

# Model training
algo = SVD(random_state=0)
algo.fit(trainSet)

# Predictions
predictions = algo.test(testSet)

In [None]:
def MAE(predictions):
        return accuracy.mae(predictions, verbose=False)
def RMSE(predictions):
        return accuracy.rmse(predictions, verbose=False)
    
print("RMSE: ", RMSE(predictions))
print("MAE: ", MAE(predictions))

In [None]:
def GetTopN(predictions, n=10, minimumRating=4.0):
    topN = defaultdict(list)
    for userID, movieID, actualRating, estimatedRating, _ in predictions:
        if (estimatedRating >= minimumRating):
            topN[int(userID)].append((int(movieID), estimatedRating))

    for userID, ratings in topN.items():
        ratings.sort(key=lambda x: x[1], reverse=True)
        topN[int(userID)] = ratings[:n]

    return topN

In [None]:
topN = GetTopN(predictions)
for i,j in topN.items():
    print ('user_id: ', i)
    for item in j:
        print ('\tmovie_id:', item[0], '\tpredicted rating:', item[1])
    break

### Check one user

In [None]:
user_id = 47

In [None]:
# get all movies watched by this user
df_user = df[df['userId'] == user_id]
df_user = df_user[df_user['rating'] >= 5.0]
df_user = df_user.drop('timestamp', axis =1)
print ('Watched Movies + Rating 5.0')
df_user.head(10)

In [None]:
# get all recommendations for this user
movie_list = list()
genre_list = list()
for movie_id,pred_rating in topN[user_id]:
#     print (str(movie_id) + " >> " + str(pred_rating))
    title = movies[movies['movieId'] == movie_id].values[0][1]
    genre = movies[movies['movieId'] == movie_id].values[0][2]
    movie_list.append(title)
    genre_list.append(genre)
data_tuples = list(zip(movie_list,genre_list))
reco_df = pd.DataFrame(data_tuples, columns=['Title','Genre'])
print ('Recommended Movies')
reco_df.head(10)