<a href="https://colab.research.google.com/github/pjmndrll/Movie-Recommender-System/blob/main/Recommender_Systems.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!pip install scikit-surprise

Collecting scikit-surprise
  Downloading scikit-surprise-1.1.3.tar.gz (771 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m772.0/772.0 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (setup.py) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.3-cp310-cp310-linux_x86_64.whl size=3162993 sha256=6b804d2e63b31aeefb9f3d7e4443ec369a04dc34e1ffbf3239106c888899e6ac
  Stored in directory: /root/.cache/pip/wheels/a5/ca/a8/4e28def53797fdc4363ca4af740db15a9c2f1595ebc51fb445
Successfully built scikit-surprise
Installing collected packages: scikit-surprise
Successfully installed scikit-surprise-1.1.3


In [3]:
# Import libraries
import numpy as np
import pandas as pd
from surprise import Dataset, Reader, KNNBasic
from surprise.model_selection import train_test_split
from surprise import accuracy

In [4]:
# Specify the file paths
movies_file_path = '/content/movies.csv'
ratings_file_path = '/content/ratings.csv'

# Read the CSV files into DataFrames
df_movies = pd.read_csv(movies_file_path)
df_ratings = pd.read_csv(ratings_file_path)


In [5]:
df_movies

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...
10324,146684,Cosmic Scrat-tastrophe (2015),Animation|Children|Comedy
10325,146878,Le Grand Restaurant (1966),Comedy
10326,148238,A Very Murray Christmas (2015),Comedy
10327,148626,The Big Short (2015),Drama


In [6]:
df_ratings

Unnamed: 0,userId,movieId,rating,timestamp
0,1,16,4.0,1217897793
1,1,24,1.5,1217895807
2,1,32,4.0,1217896246
3,1,47,4.0,1217896556
4,1,50,4.0,1217896523
...,...,...,...,...
105334,668,142488,4.0,1451535844
105335,668,142507,3.5,1451535889
105336,668,143385,4.0,1446388585
105337,668,144976,2.5,1448656898


In [7]:
# Merge the DataFrames on 'movieId'
df = pd.merge(df_ratings, df_movies, on='movieId', how='left')
df


Unnamed: 0,userId,movieId,rating,timestamp,title,genres
0,1,16,4.0,1217897793,Casino (1995),Crime|Drama
1,1,24,1.5,1217895807,Powder (1995),Drama|Sci-Fi
2,1,32,4.0,1217896246,Twelve Monkeys (a.k.a. 12 Monkeys) (1995),Mystery|Sci-Fi|Thriller
3,1,47,4.0,1217896556,Seven (a.k.a. Se7en) (1995),Mystery|Thriller
4,1,50,4.0,1217896523,"Usual Suspects, The (1995)",Crime|Mystery|Thriller
...,...,...,...,...,...,...
105334,668,142488,4.0,1451535844,Spotlight (2015),Thriller
105335,668,142507,3.5,1451535889,Pawn Sacrifice (2015),Drama
105336,668,143385,4.0,1446388585,Bridge of Spies (2015),Drama|Thriller
105337,668,144976,2.5,1448656898,Bone Tomahawk (2015),Horror|Western


In [10]:
# Convert DataFrame to Surprise Dataset
reader = Reader(rating_scale=(0.5, 5))
data = Dataset.load_from_df(df[['userId', 'movieId', 'rating']], reader)


In [11]:
# Split the data into train and test sets
trainset, testset = train_test_split(data, test_size=0.2)

In [12]:
# Use the KNNBasic collaborative filtering algorithm
sim_options = {
    'name': 'cosine',    # Compute similarities between items using cosine similarity
    'user_based': False  # Use item-based collaborative filtering
}
algo = KNNBasic(sim_options=sim_options)

# Train the algorithm on the train set
algo.fit(trainset)



Computing the cosine similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNBasic at 0x7fa007ac2080>

In [13]:
# Make predictions
predictions = algo.test(testset)


In [14]:
# Evaluate the model
rmse = accuracy.rmse(predictions)
print("RMSE:", rmse)

RMSE: 0.9750
RMSE: 0.9750066160096682


In [15]:
# Get the distinct movie titles corresponding to movieIds
movie_titles = df[['movieId', 'title']].drop_duplicates()


In [16]:
# Recommend top N movies to a user
user_id = 1  # User ID can be replaced
top_n = 10  # Number of movies to recommend can be replaced



In [17]:
# Create a dictionary to store recommendations
user_recommendations = {}
for movie_id, _, _, estimated_rating, _ in predictions:
    user_recommendations[movie_id] = estimated_rating

In [18]:
# Sort recommendations by estimated rating
sorted_recommendations = sorted(user_recommendations.items(), key=lambda x: x[1], reverse=True)

# Get top N recommendations with movie titles
top_n_recommendations = []
for movie_id, estimated_rating in sorted_recommendations[:top_n]:
    movie_title = movie_titles.loc[movie_titles['movieId'] == movie_id, 'title']
    if not movie_title.empty:
        top_n_recommendations.append((movie_id, movie_title.values[0], estimated_rating))

# Print top N recommendations
print(f"\nTop {top_n} recommended movies for user {user_id}:")
for movie_id, movie_title, estimated_rating in top_n_recommendations:
    print(f"Movie ID: {movie_id}, Title: {movie_title}, Estimated Rating: {estimated_rating}")


Top 10 recommended movies for user 1:
Movie ID: 78, Title: Crossing Guard, The (1995), Estimated Rating: 5
Movie ID: 550, Title: Threesome (1994), Estimated Rating: 4.873058443042511
Movie ID: 15, Title: Cutthroat Island (1995), Estimated Rating: 4.799142703201941
Movie ID: 34, Title: Babe (1995), Estimated Rating: 4.714285714285714
Movie ID: 637, Title: Sgt. Bilko (1996), Estimated Rating: 4.700159681855543
Movie ID: 144, Title: Brothers McMullen, The (1995), Estimated Rating: 4.6990316283034295
Movie ID: 520, Title: Robin Hood: Men in Tights (1993), Estimated Rating: 4.680343465017675
Movie ID: 432, Title: City Slickers II: The Legend of Curly's Gold (1994), Estimated Rating: 4.649227631999519
Movie ID: 389, Title: Colonel Chabert, Le (1994), Estimated Rating: 4.625
