### Movie Recommender System

Using the small MovieLens data set, create a recommender system that allows users to input a movie they like (in the data set) and recommends ten other movies for them to watch. 

In [10]:
#importing libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from surprise import Reader, Dataset, SVD
from surprise.model_selection import cross_validate

In [11]:
#loading the ratings dataset
ratings = pd.read_csv('ratings.csv')

In [12]:
#inspecting the ratings dataset
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [13]:
#loading the movies dataset
movies = pd.read_csv('movies.csv')

In [14]:
#inspecting the movies dataset
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [15]:
#merging the two datasets
df = pd.merge(ratings, movies, on='movieId')

In [16]:
df.head()

Unnamed: 0,userId,movieId,rating,timestamp,title,genres
0,1,1,4.0,964982703,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,5,1,4.0,847434962,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
2,7,1,4.5,1106635946,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
3,15,1,2.5,1510577970,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
4,17,1,4.5,1305696483,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy


In [17]:
df.rating.describe()

count    100836.000000
mean          3.501557
std           1.042529
min           0.500000
25%           3.000000
50%           3.500000
75%           4.000000
max           5.000000
Name: rating, dtype: float64

In [18]:
#Splitting the data into training and test sets
train_data, test_data = train_test_split(df, test_size = 0.2, random_state=42)

### Model Selection

I will be using a collaborative filtering model, SVD, from the scikit surprise library. Singular Value Decomposition (SVD) tries to predict the missing interaction on the user-item interaction matrix by performing factorization to produce user latent and item latent factors.

Source: [Surprise Library](https://surpriselib.com/)

In [19]:
#creating a reader object to parse the ratings data
reader = Reader(rating_scale=(0.5, 5))

#loading the training data into the surprise library dataset format
train_dataset = Dataset.load_from_df(train_data[['userId', 'movieId', 'rating']], reader)

In [20]:
#creating the model
model = SVD()

In [21]:
#evaluating the model
cross_validate(model, train_dataset, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.8807  0.8895  0.8762  0.8869  0.8794  0.8825  0.0049  
MAE (testset)     0.6777  0.6838  0.6750  0.6828  0.6782  0.6795  0.0033  
Fit time          0.41    0.44    0.50    0.53    0.48    0.47    0.04    
Test time         0.10    0.06    0.11    0.06    0.05    0.07    0.02    


{'test_rmse': array([0.88067586, 0.88953874, 0.8762256 , 0.8868825 , 0.87937269]),
 'test_mae': array([0.67774615, 0.68378513, 0.67499307, 0.68275306, 0.67823907]),
 'fit_time': (0.41016101837158203,
  0.43994593620300293,
  0.49727797508239746,
  0.5255599021911621,
  0.4842832088470459),
 'test_time': (0.09834003448486328,
  0.05512094497680664,
  0.10857295989990234,
  0.056704044342041016,
  0.05471181869506836)}

### Movie Recommendations

In [22]:
def get_recommendations(movie_title, model, movies, data, n=10):
    """ A function to provide movie recommendations"""
    movie_id = movies[movies['title'] == movie_title]['movieId'].iloc[0]
    
    #get the ratings of the input movie
    ratings = data[data['movieId'] == movie_id]
    
    #predict ratings for all movies for the target user (userID=0)
    user_id = 0
    predictions = []
    for movie_id in data['movieId'].unique():
        prediction = model.predict(user_id, movie_id)
        predictions.append((movie_id, prediction.est))
        
    #sorting the predictions
    predictions.sort(key=lambda x: x[1], reverse = True)
    
    #showing the top recommended movie titles
    recommended_movies = []
    for movie_id, _ in predictions[:n]:
        recommended_movie = movies[movies['movieId'] == movie_id]['title'].iloc[0]
        recommended_movies.append(recommended_movie)
    
    return recommended_movies

In [23]:
liked_movie = 'Jumanji (1995)'
recommended_movies = get_recommendations(liked_movie, model, movies, df)

for movie in recommended_movies:
    print(movie)

Lawrence of Arabia (1962)
Shawshank Redemption, The (1994)
Godfather, The (1972)
Manchurian Candidate, The (1962)
Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb (1964)
Departed, The (2006)
Godfather: Part II, The (1974)
Princess Bride, The (1987)
Spirited Away (Sen to Chihiro no kamikakushi) (2001)
Mulholland Drive (2001)
