In [14]:
# import libraries

import pandas as pd
import numpy as np
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split

In [15]:
# load datasets

movies_df=pd.read_csv("movies.csv")
ratings_df=pd.read_csv("ratings.csv")

In [16]:
movies_df.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [17]:
ratings_df.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [18]:
# merging two datasets based on movie id to create a final dataset

df=pd.merge(movies_df,ratings_df, on='movieId', how='inner')
df.head()

Unnamed: 0,movieId,title,genres,userId,rating,timestamp
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,1,4.0,964982703
1,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,5,4.0,847434962
2,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,7,4.5,1106635946
3,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,15,2.5,1510577970
4,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,17,4.5,1305696483


In [19]:
# there might be more than one rating by a user to a particular movie, so lets find that out
groupby_movie_userid = df.groupby(['userId', 'movieId']).size().reset_index(name='count')
multiple_rating = groupby_movie_userid[groupby_movie_userid['count'] > 1]
multiple_rating

Unnamed: 0,userId,movieId,count


There aren't any scenari where one user has multiple review for the same movie.

In [20]:
# find the minimum and maximum ratings
print(ratings_df['rating'].min())
print(ratings_df['rating'].max())

0.5
5.0


The rating scale is set from 0.5 to 5, indicating that movie ratings are expected to fall within this range.

In [21]:
# now, let's reate a reader object to parse the dataset
reader = Reader(rating_scale=(0.5, 5))

In [22]:
# loading the dataset into Surprise's data format
data = Dataset.load_from_df(df[['userId', 'movieId', 'rating']], reader=reader)

In [23]:
# Split the data into training and testing sets
trainset, testset = train_test_split(data, test_size=0.2)

In [24]:
#using the SVD algorithm for collaborative filtering
model = SVD()

# training the algorithm on the training set
model.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7f91633814f0>

In [26]:
#recommender function

def get_recommendations(movie_title, num_recommendations):
    
    #since users can enter movie name in any case, 
    #we will try to make the recommender understand both upper and lower case
    
    #convert movie title to lowercase for case insensitivity
    movie_title = movie_title.lower()
    
    #check if the movie exists in the dataset
    if movie_title not in movies_df['title'].str.lower().unique():
        return "Movie not found."
    
    # Get the movie ID for the given movie title
    movie_id = movies_df.loc[movies_df['title'].str.lower() == movie_title, 'movieId'].values[0]
    print(f"here{movie_id}")
    
    # Get the inner movie ID from the Surprise movie ID
    inner_movie_id = trainset.to_inner_iid(movie_id)
    print(f"here again{inner_movie_id}")
    
    # Predict ratings for all movies for the given user
    movie_ratings = []
    for movie in movies_df['movieId'].unique():
        inner_movie = trainset.to_inner_iid(movie)
        print(f"inner_movie {inner_movie_id}")
        predicted_rating = model.predict(uid=trainset.to_inner_uid(0), iid=inner_movie).est
        movie_ratings.append((movie, predicted_rating))
    
    #sorting the movies based on predicted ratings
    movie_ratings.sort(key=lambda x: x[1], reverse=True)
    
    # Get the top movies with highest predicted ratings
    top_movies = movie_ratings[:num_recommendations]
    
    # Convert movie IDs to movie titles
    recommended_movies = movies_df[movies_df['movieId'].isin([movie[0] for movie in top_movies])]['title']
    
    return recommended_movies

In [27]:
# Example usage
movie_title = input("Enter a movie name: ")  # Movie title for which recommendations are needed
num_recommendations = 10  # Number of recommendations to generate

recommendations = get_recommendations(movie_title, num_recommendations)

# Print the recommended movie titles or error message
if isinstance(recommendations, str):
    print(recommendations)
else:
    print(recommendations)

Enter a movie name: Chinatown (1974)
here1252
here again34
inner_movie 34


ValueError: User 0 is not part of the trainset.