### Imports

In [1]:
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
from sklearn.decomposition import TruncatedSVD
import matplotlib.pyplot as plt
import seaborn as sns


### Load and Preprocess Data

In [2]:
# Load datasets
ratings = pd.read_csv("ratings.csv")
movies = pd.read_csv("movie.csv")

# Merge datasets
df = pd.merge(ratings, movies, on="movieId")

# Drop unnecessary columns
df.drop(["timestamp", "genres"], axis=1, inplace=True)




### Create Sparse Matrix for Collaborative Filtering

In [3]:
# Create a user-movie sparse matrix directly (userId and movieId must start at 0 for CSR)
from scipy.sparse import csr_matrix

# Reindex userId and movieId to avoid large sparse dimensions
df['user_idx'] = df['userId'].astype("category").cat.codes
df['movie_idx'] = df['movieId'].astype("category").cat.codes

# Store mapping to recover later
user_id_map = dict(enumerate(df['userId'].astype("category").cat.categories))
movie_id_map = dict(enumerate(df['movieId'].astype("category").cat.categories))

# Create sparse matrix
sparse_matrix = csr_matrix((df['rating'], (df['user_idx'], df['movie_idx'])))


In [4]:
from sklearn.neighbors import NearestNeighbors

# Fit KNN
knn = NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=11, n_jobs=-1)
knn.fit(sparse_matrix)

# KNN Recommendation Function
def recommend_knn(user_id, n_recommendations=10):
    if user_id not in df['userId'].values:
        return "User not found."

    user_idx = df[df['userId'] == user_id]['user_idx'].iloc[0]
    distances, indices = knn.kneighbors(sparse_matrix[user_idx], n_neighbors=n_recommendations + 1)
    
    similar_users = indices.flatten()[1:]
    similar_user_ids = [user_id_map[idx] for idx in similar_users]

    similar_ratings = df[df['userId'].isin(similar_user_ids)]
    top_movies = similar_ratings.groupby('movieId')['rating'].mean().nlargest(n_recommendations).index

    return movies[movies['movieId'].isin(top_movies)][['movieId', 'title']]


### Example for KNN

In [5]:
user_id = 19
print(f"KNN Recommendations for User {user_id}:")
print(recommend_knn(user_id))


KNN Recommendations for User 19:
       movieId                                              title
121        123      Chungking Express (Chung Hing sam lam) (1994)
5519      5618  Spirited Away (Sen to Chihiro no kamikakushi) ...
5591      5690     Grave of the Fireflies (Hotaru no haka) (1988)
9304     27368  Asterix & Obelix: Mission Cleopatra (Astérix &...
10269    34323                        Devil's Rejects, The (2005)
12502    58376                        Zeitgeist: The Movie (2007)
13296    65225                         Zeitgeist: Addendum (2008)
16667    84273                   Zeitgeist: Moving Forward (2011)
17501    88129                                       Drive (2011)
20295    99764                   It's Such a Beautiful Day (2012)


### Matrix Factorization using SVD

In [6]:
from sklearn.decomposition import TruncatedSVD

# Apply Truncated SVD
svd = TruncatedSVD(n_components=20, random_state=42)
svd_matrix = svd.fit_transform(sparse_matrix)

# SVD Recommendation Function
def recommend_svd(user_id, n_recommendations=10):
    if user_id not in df['userId'].values:
        return "User not found."

    user_idx = df[df['userId'] == user_id]['user_idx'].iloc[0]
    user_vector = svd_matrix[user_idx]

    scores = svd_matrix @ user_vector
    similar_indices = scores.argsort()[::-1]

    # Avoid already rated movies
    seen_movies = df[df['user_idx'] == user_idx]['movie_idx'].values
    recommended = [idx for idx in similar_indices if idx not in seen_movies][:n_recommendations]

    movie_ids = [movie_id_map.get(i) for i in recommended if movie_id_map.get(i) is not None]

    return movies[movies['movieId'].isin(movie_ids)][['movieId', 'title']]


### Example for SVD

In [7]:
user_id = 19

print("\nSVD Recommendations:")
print(recommend_svd(user_id))



SVD Recommendations:
       movieId                                              title
21446   104091  Devil's Nightmare, The (Plus longue nuit du di...
21679   104971  Imaginary Witness: Hollywood and the Holocaust...
