In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
warnings.filterwarnings('ignore')

In [3]:
# Load datasets
ratings = pd.read_csv("drive/MyDrive/Dataset2/General Result/ratings.csv")
movies = pd.read_csv("drive/MyDrive/Dataset2/General Result/Movies.csv")

In [4]:
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [5]:
movies.head()

Unnamed: 0,movieId,title,genres
0,1096197,No Way Up,"Action,Horror,Thriller"
1,823464,Godzilla x Kong: The New Empire,"Action,Science Fiction,Adventure,Fantasy"
2,934632,Rebel Moon — Part Two: The Scargiver,"Science Fiction,Action,Drama,Adventure"
3,1011985,Kung Fu Panda 4,"Animation,Action,Family,Comedy,Fantasy"
4,693134,Dune: Part Two,"Science Fiction,Adventure"


In [6]:
# Preprocess ratings data
ratings = ratings[['userId', 'movieId', 'rating']]

In [7]:
ratings.head()

Unnamed: 0,userId,movieId,rating
0,1,1,4.0
1,1,3,4.0
2,1,6,4.0
3,1,47,5.0
4,1,50,5.0


In [8]:
# Filter users with more than 200 ratings
x = ratings['userId'].value_counts() > 200
y = x[x].index
ratings = ratings[ratings['userId'].isin(y)]

In [9]:
ratings.head()

Unnamed: 0,userId,movieId,rating
0,1,1,4.0
1,1,3,4.0
2,1,6,4.0
3,1,47,5.0
4,1,50,5.0


In [10]:
# Merge movies and ratings data
movies_ratings = pd.merge(ratings, movies, on="movieId")
movies_ratings.head()

Unnamed: 0,userId,movieId,rating,title,genres
0,1,3,4.0,Shadows in Paradise,"Drama,Comedy,Romance"
1,6,3,5.0,Shadows in Paradise,"Drama,Comedy,Romance"
2,19,3,3.0,Shadows in Paradise,"Drama,Comedy,Romance"
3,42,3,4.0,Shadows in Paradise,"Drama,Comedy,Romance"
4,51,3,4.0,Shadows in Paradise,"Drama,Comedy,Romance"


In [11]:
# Count number of ratings for each movie
number_rating = movies_ratings.groupby('title')['rating'].count().reset_index()
number_rating.head()

Unnamed: 0,title,rating
0,10 Things I Hate About You,7
1,"10,000 BC",3
2,15 Minutes,1
3,16 Blocks,1
4,"20,000 Leagues Under the Sea",29


In [12]:
number_rating.rename(columns={'rating': 'No.Of Rating'}, inplace=True)
number_rating.head()

Unnamed: 0,title,No.Of Rating
0,10 Things I Hate About You,7
1,"10,000 BC",3
2,15 Minutes,1
3,16 Blocks,1
4,"20,000 Leagues Under the Sea",29


In [13]:
# Merge to get final ratings data
final_rating = movies_ratings.merge(number_rating, on='title')
final_rating.head()

Unnamed: 0,userId,movieId,rating,title,genres,No.Of Rating
0,1,3,4.0,Shadows in Paradise,"Drama,Comedy,Romance",25
1,6,3,5.0,Shadows in Paradise,"Drama,Comedy,Romance",25
2,19,3,3.0,Shadows in Paradise,"Drama,Comedy,Romance",25
3,42,3,4.0,Shadows in Paradise,"Drama,Comedy,Romance",25
4,51,3,4.0,Shadows in Paradise,"Drama,Comedy,Romance",25


In [14]:
final_rating = final_rating[final_rating['No.Of Rating'] >= 10]
final_rating.head()

Unnamed: 0,userId,movieId,rating,title,genres,No.Of Rating
0,1,3,4.0,Shadows in Paradise,"Drama,Comedy,Romance",25
1,6,3,5.0,Shadows in Paradise,"Drama,Comedy,Romance",25
2,19,3,3.0,Shadows in Paradise,"Drama,Comedy,Romance",25
3,42,3,4.0,Shadows in Paradise,"Drama,Comedy,Romance",25
4,51,3,4.0,Shadows in Paradise,"Drama,Comedy,Romance",25


In [15]:
# Create the user-movie interaction matrix
user_movie_matrix = final_rating.pivot_table(columns='movieId', index='userId', values='rating')
user_movie_matrix.head()

movieId,3,5,6,11,18,19,22,24,25,58,...,8874,8914,8961,8984,33166,49530,51540,89492,93840,107406
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,,4.0,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,3.0,...,,,,,,,,,,
6,5.0,5.0,4.0,4.0,,2.0,5.0,4.0,3.0,,...,,,,,,,,,,
18,,,4.0,,,,,,,,...,,,3.5,3.0,,,4.5,,,
19,3.0,,,,,2.0,,,,,...,,,,,,,,,,


In [16]:
user_movie_matrix.fillna(0, inplace=True)
user_movie_matrix.head()

movieId,3,5,6,11,18,19,22,24,25,58,...,8874,8914,8961,8984,33166,49530,51540,89492,93840,107406
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,5.0,5.0,4.0,4.0,0.0,2.0,5.0,4.0,3.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
18,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,3.5,3.0,0.0,0.0,4.5,0.0,0.0,0.0
19,3.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [17]:
# Convert to sparse matrix
user_movie_sparse = csr_matrix(user_movie_matrix)

In [18]:
# Fit the collaborative filtering model
model = NearestNeighbors(metric='cosine', algorithm='brute')
model.fit(user_movie_sparse)

In [19]:
# Define the collaborative filtering function
def collaborative_filtering(user_id):
    if user_id not in user_movie_matrix.index:
        print(f"User ID {user_id} not found in user_movie_matrix.")
        return []

    # Get the distances and indices of the nearest neighbors
    distances, suggestions = model.kneighbors(user_movie_matrix.loc[user_id].values.reshape(1, -1), n_neighbors=6)

    # Get the movies this user has interacted with
    user_interactions = user_movie_matrix.loc[user_id].to_numpy().nonzero()[0]
    recommended_movies = []

    # Loop through the suggestions (similar users)
    for i in range(1, len(suggestions[0])):  # Skip the first user (itself)
        similar_user_id = suggestions[0][i]
        similar_user_interactions = user_movie_matrix.iloc[similar_user_id].to_numpy().nonzero()[0]

        # Find recommendations (movies that similar users have rated, but this user has not)
        recommendations = np.setdiff1d(similar_user_interactions, user_interactions)
        recommended_movies.extend(recommendations)

    # Remove duplicates and limit to top 10 recommendations
    recommended_movies = list(set(recommended_movies))[:10]

    if not recommended_movies:
        print(f"No recommendations available for User {user_id}.")
        return []

    return recommended_movies

In [22]:
# Example usage
user_id = int(input('Enter the user id : '))
collaborative_filtering(user_id)

Enter the user id : 4


[0, 1, 2, 3, 4, 6, 7, 8, 10, 11]