In [40]:
import pandas as pd
from surprise import Dataset,Reader
from surprise.model_selection import train_test_split
from surprise import SVD, SVDpp, NMF
from surprise import accuracy
# Load the MovieLens dataset (ml-latest-small)
movies=pd.read_csv('ml-latest-small/movies.csv')
ratings=pd.read_csv('ml-latest-small/ratings.csv')
print(movies.info())
print(movies.shape)
print(ratings.info())
print(ratings.shape)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9742 entries, 0 to 9741
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   movieId  9742 non-null   int64 
 1   title    9742 non-null   object
 2   genres   9742 non-null   object
dtypes: int64(1), object(2)
memory usage: 228.5+ KB
None
(9742, 3)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100836 entries, 0 to 100835
Data columns (total 4 columns):
 #   Column     Non-Null Count   Dtype  
---  ------     --------------   -----  
 0   userId     100836 non-null  int64  
 1   movieId    100836 non-null  int64  
 2   rating     100836 non-null  float64
 3   timestamp  100836 non-null  int64  
dtypes: float64(1), int64(3)
memory usage: 3.1 MB
None
(100836, 4)


In [17]:
# Merge movies and ratings data
movie_ratings=pd.merge(ratings,movies,on='movieId')
movie_ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp,title,genres
0,1,1,4.0,964982703,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,5,1,4.0,847434962,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
2,7,1,4.5,1106635946,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
3,15,1,2.5,1510577970,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
4,17,1,4.5,1305696483,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy


In [19]:
# Create a Surprise Reader
reader=Reader(rating_scale=(1,5))
reader

<surprise.reader.Reader at 0x19a1303b940>

In [20]:
# Load the data into Surprise Dataset
surpised_data=Dataset.load_from_df(movie_ratings[['userId','movieId','rating']],reader)
surpised_data

<surprise.dataset.DatasetAutoFolds at 0x19a13039690>

In [29]:
# Split the data into training and testing sets
trainset,testset=train_test_split(surpised_data,test_size=0.2,random_state=42)
print(trainset)
print(testset[:5])
print(len(testset))

<surprise.trainset.Trainset object at 0x0000019A1303A110>
[(177, 1288, 3.5), (551, 165549, 1.0), (212, 81834, 4.0), (249, 40815, 3.5), (492, 1363, 4.0)]
20168


In [37]:
# Use the SVD algorithm
svd=SVD()
# Train the algorithm on the training set
svd.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x19a130c1840>

In [38]:
# Use the SVD algorithm
svdpp=SVDpp()
# Train the algorithm on the training set
svdpp.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVDpp at 0x19a130c1d20>

In [41]:
# Use the SVD algorithm
nmf=NMF()
# Train the algorithm on the training set
nmf.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.NMF at 0x19a13004730>

In [45]:
model = {"svd":svd,"svdpp":svdpp,"nmf":nmf}
for i in model:
    print(i)
    # Make predictions on the test set
    predictions=model[i].test(testset)
    # Evaluate the predictions using RMSE (Root Mean Squared Error)
    rmse=accuracy.rmse(predictions)
    print(f"RMSE on the test set of {i}: {rmse}")

svd
RMSE: 0.8723
RMSE on the test set of svd: 0.8723020745470047
svdpp
RMSE: 0.8616
RMSE on the test set of svdpp: 0.8616124031563398
nmf
RMSE: 0.9221
RMSE on the test set of nmf: 0.9221251598244359


In [46]:
# Get recommendations for a specific user
user_id=1
user_movies=movie_ratings[movie_ratings['userId']==user_id]['movieId'].unique()
# Filter out movies the user has already rated
unrated_movies=movies[~movies['movieId'].isin(user_movies)]['movieId']
# Make predictions for unrated movies
user_predictions=[nmf.predict(user_id,movie_id) for movie_id in unrated_movies]
# Sort predictions by estimated rating in descending order
sorted_predictions=sorted(user_predictions,key=lambda x:x.est,reverse=True)
# Get top 10 movie recommendations
top_recommendations=sorted_predictions[:10]
# Print top recommendations
print(f"\nTop 10 movie recommendations for User {user_id}:")
for recommendation in top_recommendations:
   movie_title=movies[movies['movieId']==recommendation.iid]['title'].values[0]
   print(f"{movie_title} (Estimated Rating: {recommendation.est})")


Top 10 movie recommendations for User 1:
Sense and Sensibility (1995) (Estimated Rating: 5)
Persuasion (1995) (Estimated Rating: 5)
Lamerica (1994) (Estimated Rating: 5)
Angels and Insects (1995) (Estimated Rating: 5)
Heidi Fleiss: Hollywood Madam (1995) (Estimated Rating: 5)
Heavenly Creatures (1994) (Estimated Rating: 5)
Priest (1994) (Estimated Rating: 5)
Three Colors: Red (Trois couleurs: Rouge) (1994) (Estimated Rating: 5)
Three Colors: Blue (Trois couleurs: Bleu) (1993) (Estimated Rating: 5)
Shawshank Redemption, The (1994) (Estimated Rating: 5)
