# Recommendation System for MoveLens Dataset using SVD

In [1]:
# Import libraries
import numpy as np
import pandas as pd

# To load the 'ratings' and 'movies' dataset after uploading them to Jupyter notebook

In [2]:
# Reading ratings file

ratings = pd.read_csv('ratings.csv', usecols=['userId','movieId','rating','timestamp'])

In [3]:
# Reading movies file

movies = pd.read_csv('movies.csv', usecols=['movieId','title','genres'])

In [4]:
# Print first five rows of movies datset

movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [5]:
# Print first five rows of ratings datset

ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,2,3.5,1112486027
1,1,29,3.5,1112484676
2,1,32,3.5,1112484819
3,1,47,3.5,1112484727
4,1,50,3.5,1112484580


# To find the unique number of users and movies in the 'ratings' dataset

In [6]:

n_users = ratings.userId.unique().shape[0]

n_movies = ratings.movieId.unique().shape[0]

print(f'Number of users = {n_users} and Number of movies = {n_movies}')

Number of users = 7120 and Number of movies = 14026


# To create a rating matrix for the 'ratings' dataset

In [7]:
Ratings = ratings.pivot(index = 'userId', columns ='movieId', values = 'rating').fillna(0)
Ratings.head()

movieId,1,2,3,4,5,6,7,8,9,10,...,129350,129354,129428,129707,130052,130073,130219,130462,130490,130642
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,3.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# To install the scikit-surprise library for implementing SVD

### Run the following command in the Anaconda Prompt to install surprise package

In [None]:
#conda install -c conda-forge scikit-surprise

In [8]:
# Import libraries from Surprise package
from surprise import Reader, Dataset, SVD
from surprise.model_selection import cross_validate

# Load Reader library
reader = Reader()

# Load ratings dataset with Dataset library
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)

# Use the SVD algorithm.
svd = SVD()

# Compute the RMSE of the SVD algorithm.
cross_validate(svd, data, measures=['RMSE', 'MAE'], cv=3, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.8450  0.8466  0.8435  0.8451  0.0012  
MAE (testset)     0.6479  0.6480  0.6465  0.6475  0.0006  
Fit time          35.63   35.97   36.04   35.88   0.18    
Test time         3.20    2.93    2.81    2.98    0.16    


{'test_rmse': array([0.84500932, 0.84659537, 0.84354932]),
 'test_mae': array([0.6478572 , 0.64795734, 0.64653998]),
 'fit_time': (35.625919580459595, 35.97109508514404, 36.04439640045166),
 'test_time': (3.202505350112915, 2.930363655090332, 2.807717800140381)}

In [19]:
# Print the head of ratings dataset
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,2,3.5,1112486027
1,1,29,3.5,1112484676
2,1,32,3.5,1112484819
3,1,47,3.5,1112484727
4,1,50,3.5,1112484580


# To find all the movies rated as more than 4 stars by user with userId = 1

In [10]:
ratings_1 = ratings[(ratings['userId'] == 5) & (ratings['rating'] == 5)]
ratings_1 = ratings_1.set_index('movieId')
ratings_1 = ratings_1.join(movies)['title']
ratings_1.head(10)

movieId
11                    Dracula: Dead and Loving It (1995)
62     Don't Be a Menace to South Central While Drink...
141                                         Gospa (1995)
150                                Addiction, The (1995)
260                             Ladybird Ladybird (1994)
318    Strawberry and Chocolate (Fresa y chocolate) (...
364                                      Maverick (1994)
368                                 Reality Bites (1994)
377                      When a Man Loves a Woman (1994)
380                                   Bad Company (1995)
Name: title, dtype: object

# Train an SVD to predict ratings for user with userId = 1

In [11]:
# Create a shallow copy for the movies dataset
user_5 = movies.copy()

#Reset the index for user_5 dataset
user_5 = user_5.reset_index()



# getting full dataset
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)


#create a training set for svd
trainset = data.build_full_trainset()
svd.fit(trainset)

#Predict the ratings for user1
user_5['Estimate_Score'] = user_5['movieId'].apply(lambda x: svd.predict(1, x).est)

#Drop extra columns from the user1 data frame
user_5 = user_5.drop(['movieId','genres','index'], axis = 1)

# Sort predicted ratings for user1 in descending order
user_5 = user_5.sort_values('Estimate_Score', ascending=False)

#Print top 10 recommendations
print(user_5.head(10))

                                                   title  Estimate_Score
4897   Lord of the Rings: The Fellowship of the Ring,...        4.654343
5853       Lord of the Rings: The Two Towers, The (2002)        4.645059
7041   Lord of the Rings: The Return of the King, The...        4.634700
6501                                   Umberto D. (1952)        4.581027
18990                                Black Mirror (2011)        4.564926
2849                                Lady Eve, The (1941)        4.558957
6873   Passion of Joan of Arc, The (Passion de Jeanne...        4.537914
12795          Encounters at the End of the World (2008)        4.522014
16191  Harry Potter and the Deathly Hallows: Part 1 (...        4.520403
8953                         Bourne Identity, The (1988)        4.499449
