In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os

In [2]:
moviesDf = pd.DataFrame(pd.read_csv("/kaggle/input/movielens-20m-dataset/movie.csv"))
ratingsDf = pd.DataFrame(pd.read_csv("/kaggle/input/movielens-20m-dataset/rating.csv"))

In [3]:
moviesDf.sample(n=5)

Unnamed: 0,movieId,title,genres
8919,26558,"Stuff, The (1985)",Comedy|Horror|Mystery|Sci-Fi
21529,104419,Justice League: Crisis on Two Earths (2010),Action|Animation|Sci-Fi
14861,74434,"Wounds, The (Rane) (1998)",Comedy|Crime|Drama|War
4613,4708,Marat/Sade (1966),Drama|Musical
26671,128320,Monty Python: Almost the Truth - Lawyers Cut (...,Comedy|Documentary


In [4]:
ratingsDf.sample(n=5)

Unnamed: 0,userId,movieId,rating,timestamp
10346871,71568,8665,3.0,2005-02-22 04:43:29
4282303,29159,1120,3.0,1997-01-21 22:43:27
11728761,80962,3578,5.0,2008-04-30 05:21:13
426699,2908,65261,4.0,2013-06-18 11:24:05
9982109,68999,231,3.0,1996-05-27 08:52:59


In [5]:
Movies= pd.merge(moviesDf[['movieId', 'title', 'genres']], ratingsDf[['userId', 'movieId', 'rating']], on='movieId', how='inner')

In [6]:
Movies.sample(5)

Unnamed: 0,movieId,title,genres,userId,rating
16066416,5952,"Lord of the Rings: The Two Towers, The (2002)",Adventure|Fantasy,38149,5.0
11952676,2883,Mumford (1999),Comedy|Drama,78257,2.0
9036263,1917,Armageddon (1998),Action|Romance|Sci-Fi|Thriller,80908,4.0
12053939,2920,Children of Paradise (Les enfants du paradis) ...,Drama|Romance,75659,5.0
2668771,370,Naked Gun 33 1/3: The Final Insult (1994),Action|Comedy,30463,3.0


In [7]:
# Check for missing values
print("Missing Values:\n")
print(Movies.isnull().sum())

Missing Values:

movieId    0
title      0
genres     0
userId     0
rating     0
dtype: int64


In [8]:
from surprise import Dataset, Reader
from surprise import SVD
from surprise.model_selection import train_test_split
from surprise import accuracy

# Define Reader format
reader = Reader(rating_scale=(0.5, 5.0))

# Load dataset
data = Dataset.load_from_df(Movies[['movieId', 'userId', 'rating']], reader)

# Train/Test split
trainset, testset = train_test_split(data, test_size=0.2)

# Use SVD model for collaborative filtering
model = SVD()
model.fit(trainset)
predictions = model.test(testset)

# Check accuracy
rmse = accuracy.rmse(predictions)
print(f"RMSE: {rmse}")

RMSE: 0.7836
RMSE: 0.7836141396410193


In [19]:
# Function to Recommend Movies
def recommend_movies(user_id, num_recommendations):
    
    all_movie_ids = Movies['movieId'].unique()
    
    rated_movies = Movies[Movies['userId'] == user_id]['movieId'].values
    unrated_movies = [movie for movie in all_movie_ids if movie not in rated_movies]
    
    predictions = [model.predict(user_id, movie_id) for movie_id in unrated_movies]
    predictions.sort(key=lambda x: x.est, reverse=True)
    
    top_movies = predictions[:num_recommendations]
    recommended_titles = [Movies[Movies['movieId'] == pred.iid]['title'].values[0] for pred in top_movies]
    
    return recommended_titles

# Example Usage: Recommend movies for user 1
userID=input("Enter Your UserID: ")
num_recommendation = int(input("Number of recommendations you want= "))
print(f"Recommended Movies are \n",recommend_movies(userID, num_recommendation))

Enter Your UserID:  1234
Number of recommendations you want=  10


Recommended Movies are 
 ['For a Lost Soldier (Voor een Verloren Soldaat) (1992)', 'Those Daring Young Men in Their Jaunty Jalopies (1969)', 'Voyage to the Bottom of the Sea (1961)', 'Little Women (1949)', 'Hijacking, A (Kapringen) (2012)', 'Joe (1970)', 'If Winter Comes (1947)', 'Gates of Heaven (1978)', 'Welcome Farewell-Gutmann (Bienvenido a Farewell-Gutmann) (2008)', 'Roaring Twenties, The (1939)']
