In [52]:
pip install scikit-surprise




In [22]:
import pandas as pd

ratings = pd.read_csv('ratings.csv')
movies = pd.read_csv('movies.csv')

# Merge datasets
data = pd.merge(ratings, movies, on='movieId')
data.head()

Unnamed: 0,userId,movieId,rating,timestamp,title,genres
0,1,1,4.0,964982703,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,5,1,4.0,847434962,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
2,7,1,4.5,1106635946,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
3,15,1,2.5,1510577970,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
4,17,1,4.5,1305696483,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy


In [53]:
from surprise import Reader, Dataset

reader = Reader(rating_scale=(0.5, 5.0))
dataset = Dataset.load_from_df(data[['userId', 'movieId', 'rating']], reader)

In [54]:
from surprise import SVD, accuracy
from surprise.model_selection import train_test_split

trainset, testset = train_test_split(dataset, test_size=0.25)

algo = SVD()
algo.fit(trainset)
predictions = algo.test(testset)

In [26]:
accuracy.rmse(predictions)

RMSE: 0.8805


0.8805289322365422

In [48]:
def get_recommendations(user_id, num_recommendations=10):
    # Identify User Ratings
    user_ratings = data[data['userId'] == user_id]
    watched_movie_ids = user_ratings['movieId'].tolist()
    # Find Unwatched Movies
    all_movie_ids = data['movieId'].unique()
    to_predict = [movie_id for movie_id in all_movie_ids if movie_id not in watched_movie_ids]
    # Predict Ratings for Unwatched Movies
    predicted_ratings = [algo.predict(user_id, movie_id).est for movie_id in to_predict]
    # Select Top Recommendations
    top_indices = sorted(range(len(predicted_ratings)), key=lambda i: predicted_ratings[i], reverse=True)[:num_recommendations]
    top_movie_ids = [to_predict[i] for i in top_indices]
    # Return Recommendations
    recommendations = movies[movies['movieId'].isin(top_movie_ids)]
    recommendations['predicted_rating'] = [predicted_ratings[i] for i in top_indices]
    return recommendations

In [51]:
#Sample usage
user_id = 15
recommendations = get_recommendations(user_id)
print(recommendations)

      movieId                                              title  \
46         50                         Usual Suspects, The (1995)   
602       750  Dr. Strangelove or: How I Learned to Stop Worr...   
681       899                         Singin' in the Rain (1952)   
863      1136             Monty Python and the Holy Grail (1975)   
909      1208                              Apocalypse Now (1979)   
920      1219                                      Psycho (1960)   
941      1242                                       Glory (1989)   
957      1258                                Shining, The (1980)   
8466   112552                                    Whiplash (2014)   
9463   168252                                       Logan (2017)   

                        genres  predicted_rating  
46      Crime|Mystery|Thriller          4.373850  
602                 Comedy|War          4.205251  
681     Comedy|Musical|Romance          4.177368  
863   Adventure|Comedy|Fantasy          4.12454

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  recommendations['predicted_rating'] = [predicted_ratings[i] for i in top_indices]
