In [17]:
!pip install scikit-surprise



In [2]:
import pandas as pd
from surprise import Dataset
from surprise import Reader
from surprise import SVD
from surprise.model_selection import train_test_split
from surprise import accuracy

In [3]:
ratings = pd.read_csv('ratings.csv')

In [4]:
ratings.head()

Unnamed: 0,userId,movieId,rating,tstamp
0,206,4803,4.0,2003-04-07 13:52:01
1,5073,72731,4.0,2020-02-19 16:07:53
2,4739,91653,4.0,2020-12-28 15:35:58
3,535,3005,3.0,2008-12-26 05:38:11
4,465,4776,3.0,2008-08-13 20:22:36


In [5]:
ratings.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 60985 entries, 0 to 60984
Data columns (total 4 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   userId   60985 non-null  int64  
 1   movieId  60985 non-null  int64  
 2   rating   60985 non-null  float64
 3   tstamp   60985 non-null  object 
dtypes: float64(1), int64(2), object(1)
memory usage: 1.9+ MB


In [6]:
ratings['rating'].value_counts()

4.0    13974
3.5    11620
3.0    10952
4.5     6340
5.0     5387
2.5     4866
2.0     3732
1.0     1485
1.5     1448
0.5     1181
Name: rating, dtype: int64

In [7]:
min_rating = 0.5
max_rating = 5.0

# Create a Surprise Reader object to specify the rating scale
reader = Reader(rating_scale=(min_rating, max_rating))  # Specify the actual rating scale used in your data

# Load the Pandas DataFrame into a Surprise Dataset
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)

# Split the data into train and test sets
trainset, testset = train_test_split(data, test_size=.2)


In [8]:
algo = SVD()

In [9]:
algo.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x78c1035ad1b0>

In [10]:
predictions = algo.test(testset)
accuracy.rmse(predictions)

RMSE: 0.8817


0.8817387884926517

In [11]:
def predict_rating(user_id, movie_id):
    prediction = algo.predict(user_id, movie_id)
    return prediction.est

In [12]:
def recommend_movies(user_id):
    # Get a list of all the movies the user has not rated
    all_movies = ratings['movieId'].unique()
    user_movies = ratings[ratings['userId'] == user_id]['movieId'].unique()
    new_movies = list(set(all_movies) - set(user_movies))

    # Predict the ratings for the new movies
    predictions = [algo.predict(user_id, movie_id) for movie_id in new_movies]

    # Sort the predictions by estimated rating
    predictions.sort(key=lambda x: x.est, reverse=True)

    # Get the top 10 recommendations
    top_recommendations = [prediction.iid for prediction in predictions[:10]]

    return top_recommendations

In [13]:
movie_ids=recommend_movies(206)

In [14]:
df=ratings = pd.read_csv('movies.csv')

In [15]:
df.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [16]:
filtered_df = df[df['movieId'].isin(movie_ids)]

# Iterate over the filtered DataFrame and print the title and genres for each movie
for index, row in filtered_df.iterrows():
    print(f"Movie ID: {row['movieId']}")
    print(f"Title: {row['title']}")
    print(f"Genres: {row['genres']}")
    print('-' * 30)

Movie ID: 296
Title: Pulp Fiction (1994)
Genres: Comedy|Crime|Drama|Thriller
------------------------------
Movie ID: 858
Title: Godfather, The (1972)
Genres: Crime|Drama
------------------------------
Movie ID: 1213
Title: Goodfellas (1990)
Genres: Crime|Drama
------------------------------
Movie ID: 1961
Title: Rain Man (1988)
Genres: Drama
------------------------------
Movie ID: 2571
Title: Matrix, The (1999)
Genres: Action|Sci-Fi|Thriller
------------------------------
Movie ID: 2959
Title: Fight Club (1999)
Genres: Action|Crime|Drama|Thriller
------------------------------
Movie ID: 5618
Title: Spirited Away (Sen to Chihiro no kamikakushi) (2001)
Genres: Adventure|Animation|Fantasy
------------------------------
Movie ID: 8874
Title: Shaun of the Dead (2004)
Genres: Comedy|Horror
------------------------------
Movie ID: 44199
Title: Inside Man (2006)
Genres: Crime|Drama|Thriller
------------------------------
Movie ID: 79132
Title: Inception (2010)
Genres: Action|Crime|Drama|Myst