Import Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from surprise import Dataset, Reader, SVD, accuracy
from surprise.model_selection import train_test_split

Load and Explore the Data

In [2]:
# Load data files
movies = pd.read_csv('movies.csv')
ratings = pd.read_csv('ratings.csv')

# Check first few rows of each
print(movies.head())
print(ratings.head())


   movieId                               title  \
0        1                    Toy Story (1995)   
1        2                      Jumanji (1995)   
2        3             Grumpier Old Men (1995)   
3        4            Waiting to Exhale (1995)   
4        5  Father of the Bride Part II (1995)   

                                        genres  
0  Adventure|Animation|Children|Comedy|Fantasy  
1                   Adventure|Children|Fantasy  
2                               Comedy|Romance  
3                         Comedy|Drama|Romance  
4                                       Comedy  
   userId  movieId  rating  timestamp
0       1        1     4.0  964982703
1       1        3     4.0  964981247
2       1        6     4.0  964982224
3       1       47     5.0  964983815
4       1       50     5.0  964982931


Data Preprocessing

In [3]:
# Merge movies and ratings on movieId
movie_data = pd.merge(ratings, movies, on='movieId')

# Check for missing values
print(movie_data.isnull().sum())


userId       0
movieId      0
rating       0
timestamp    0
title        0
genres       0
dtype: int64


Train the Recommendation Model

In [4]:
# Setup the dataset for Surprise
reader = Reader(rating_scale=(0.5, 5.0))
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)

# Split into training and testing sets
trainset, testset = train_test_split(data, test_size=0.2)

# Initialize and train the SVD algorithm
algo = SVD()
algo.fit(trainset)

# Make predictions
predictions = algo.test(testset)

Model Evaluation

In [5]:
# Calculate RMSE
rmse = accuracy.rmse(predictions)
print(f"RMSE: {rmse}")


RMSE: 0.8764
RMSE: 0.876395625861118


Making Recommendations

In [6]:
def get_top_n(predictions, n=10):
    """Return the top-N recommendation for each user from a set of predictions."""
    # Map the predictions to each user.
    top_n = {}
    for uid, iid, true_r, est, _ in predictions:
        if uid not in top_n:
            top_n[uid] = []
        top_n[uid].append((iid, est))

    # Sort the predictions for each user and retrieve the n highest ones.
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]

    return top_n

# Get top 10 recommendations for each user
top_n = get_top_n(predictions, n=10)


Optional Deployment with Streamlit

In [7]:
import streamlit as st

st.title("Movie Recommendation System")

user_id = st.number_input("Enter user ID to get recommendations", min_value=1, step=1)

if st.button("Get Recommendations"):
    # Display recommendations for the user
    if user_id in top_n:
        st.write(f"Top recommendations for User {user_id}:")
        for movie_id, rating in top_n[user_id]:
            movie_name = movies[movies['movieId'] == movie_id]['title'].values[0]
            st.write(f"{movie_name} (Predicted Rating: {rating:.2f})")
    else:
        st.write("No recommendations found for this user.")


2024-11-11 10:49:15.564 
  command:

    streamlit run /Users/kirankumar/anaconda3/lib/python3.11/site-packages/ipykernel_launcher.py [ARGUMENTS]
