# Movie Recommendation System using SVD
This project aims to create a movie recommendation system using the Singular Value Decomposition (SVD) algorithm. The dataset used for this project is the MovieLens dataset, which consists of movie ratings provided by users.

In [22]:
import numpy as np
import pandas as pd
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split, GridSearchCV
from surprise import accuracy

In [23]:
# Load the MovieLens dataset (Update the file paths as needed)
ratings_df = pd.read_csv('ratings.csv')
movies_df = pd.read_csv('movies.csv')

# Create a Reader object for the dataset
reader = Reader(rating_scale=(1, 5))

# Load the dataset into a Surprise Dataset object
data = Dataset.load_from_df(ratings_df[['userId', 'movieId', 'rating']], reader)

In [24]:
ratings_df.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [25]:
movies_df.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [26]:
print(ratings_df.shape, movies_df.shape)

(100836, 4) (9742, 3)


In [27]:
print(ratings_df.isnull().sum())
print("#"*20)
print(movies_df.isnull().sum())

userId       0
movieId      0
rating       0
timestamp    0
dtype: int64
####################
movieId    0
title      0
genres     0
dtype: int64


In [28]:
print(ratings_df['rating'].min())
print(ratings_df['rating'].max())

0.5
5.0


In [29]:
# Create a Reader object for the dataset
reader = Reader(rating_scale=(0.5, 5))

# Load the dataset into a Surprise Dataset object
data = Dataset.load_from_df(ratings_df[['userId', 'movieId', 'rating']], reader)

In [30]:
# Split the data into train and test sets
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

In [31]:
# Define hyperparameter search space
param_grid = {
    'n_factors': [10, 50, 100],
    'n_epochs': [20, 30],
    'lr_all': [0.005, 0.01],
    'reg_all': [0.02, 0.1]
}

In [32]:
# Perform grid search with cross-validation
gs = GridSearchCV(SVD, param_grid, measures=['rmse'], cv=5, n_jobs=-1)
gs.fit(data)

# Print best parameters and corresponding RMSE
print("Best RMSE:", gs.best_score['rmse'])
print("Best parameters:", gs.best_params['rmse'])

Best RMSE: 0.8551709629369253
Best parameters: {'n_factors': 100, 'n_epochs': 30, 'lr_all': 0.01, 'reg_all': 0.1}


In [33]:
# Train the SVD model using the best parameters
best_model = gs.best_estimator['rmse']
best_model.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7f89e93845e0>

In [34]:
# Make predictions on the test set
predictions = best_model.test(testset)

In [35]:
predictions[:5]

[Prediction(uid=140, iid=6765, r_ui=3.5, est=3.291238604125512, details={'was_impossible': False}),
 Prediction(uid=603, iid=290, r_ui=4.0, est=3.885790929193444, details={'was_impossible': False}),
 Prediction(uid=438, iid=5055, r_ui=4.0, est=2.9493032390092244, details={'was_impossible': False}),
 Prediction(uid=433, iid=164179, r_ui=5.0, est=3.605533065635923, details={'was_impossible': False}),
 Prediction(uid=474, iid=5114, r_ui=4.0, est=3.343434355682305, details={'was_impossible': False})]

In [36]:
# Compute evaluation metrics
rmse = accuracy.rmse(predictions)
mae = accuracy.mae(predictions)

print("Test RMSE:", rmse)
print("Test MAE:", mae)

RMSE: 0.8619
MAE:  0.6614
Test RMSE: 0.8618595136462218
Test MAE: 0.6613946629108804


In [37]:
# Create a DataFrame to store predictions with movie titles
movie_id_to_title = {row['movieId']: row['title'] for _, row in movies_df.iterrows()}
predictions_df = pd.DataFrame(predictions, columns=['uid', 'iid', 'r_ui', 'est', 'details'])
predictions_df['title'] = predictions_df['iid'].map(movie_id_to_title)

In [38]:
predictions_df.head()

Unnamed: 0,uid,iid,r_ui,est,details,title
0,140,6765,3.5,3.291239,{'was_impossible': False},Under the Tuscan Sun (2003)
1,603,290,4.0,3.885791,{'was_impossible': False},Once Were Warriors (1994)
2,438,5055,4.0,2.949303,{'was_impossible': False},Dragon: The Bruce Lee Story (1993)
3,433,164179,5.0,3.605533,{'was_impossible': False},Arrival (2016)
4,474,5114,4.0,3.343434,{'was_impossible': False},"Bad and the Beautiful, The (1952)"
