# 🎯 Movie Recommendation System using SVD (Collaborative Filtering)

This notebook builds a recommendation system using Singular Value Decomposition (SVD) based collaborative filtering technique.

In [2]:
import pandas as pd
import numpy as np
from surprise import Dataset, Reader, SVD
from surprise.model_selection import cross_validate, train_test_split
import gc
from scipy.sparse import csr_matrix
from sklearn.decomposition import TruncatedSVD

import warnings
warnings.filterwarnings("ignore")

#### Load Data

In [4]:
# Loading Filtered Data in Streaming Mode

def load_filtered_data(file_path, chunk_size=5000000, min_user_ratings=20, min_movie_ratings=20):
    user_counts = {}
    movie_counts = {}
    
    # First pass: count users and movies
    for chunk in pd.read_csv(file_path, usecols=['Cust_Id', 'Name', 'Rating'], chunksize=chunk_size):
        user_counts.update(chunk['Cust_Id'].value_counts().to_dict())
        movie_counts.update(chunk['Name'].value_counts().to_dict())
    
    # Find active users and popular movies
    active_users = {user for user, count in user_counts.items() if count >= min_user_ratings}
    popular_movies = {movie for movie, count in movie_counts.items() if count >= min_movie_ratings}
    
    print(f"Active users found: {len(active_users)}")
    print(f"Popular movies found: {len(popular_movies)}")
    
    # Second pass: load only filtered data
    filtered_chunks = []
    for chunk in pd.read_csv(file_path, usecols=['Cust_Id', 'Name', 'Rating'], chunksize=chunk_size):
        chunk = chunk[chunk['Cust_Id'].isin(active_users) & chunk['Name'].isin(popular_movies)]
        filtered_chunks.append(chunk)
    
    data = pd.concat(filtered_chunks, ignore_index=True)
    print(f"Filtered dataset shape: {data.shape}")
    return data

#### User Item Matrix

In [6]:
def build_user_item_matrix(data):
    user_mapper = {cust_id: idx for idx, cust_id in enumerate(data['Cust_Id'].unique())}
    movie_mapper = {movie: idx for idx, movie in enumerate(data['Name'].unique())}
    
    user_index = data['Cust_Id'].map(user_mapper)
    movie_index = data['Name'].map(movie_mapper)
    
    matrix = csr_matrix((data['Rating'], (user_index, movie_index)))
    print(f"User-Item matrix shape: {matrix.shape}")
    
    return matrix, user_mapper, movie_mapper

#### Train the SVD Model

In [8]:
def train_svd(matrix, n_components=50):
    svd = TruncatedSVD(n_components=n_components, random_state=42)
    matrix_reduced = svd.fit_transform(matrix)
    print(f"SVD reduced matrix shape: {matrix_reduced.shape}")
    return svd, matrix_reduced

In [9]:
def recommend_movies(user_id, matrix, user_mapper, movie_mapper, svd_model, top_n=5):
    # Map user id to row index
    if user_id not in user_mapper:
        print("User not found.")
        return []
    
    user_idx = user_mapper[user_id]
    approx_ratings = svd_model.inverse_transform(svd_model.transform(matrix))
    user_ratings = approx_ratings[user_idx]
    rated_movies = matrix[user_idx].nonzero()[1]
    user_ratings[rated_movies] = -np.inf
    top_indices = np.argsort(user_ratings)[-top_n:][::-1]
    reverse_movie_mapper = {idx: movie for movie, idx in movie_mapper.items()}
    recommended_movies = [reverse_movie_mapper[idx] for idx in top_indices]
    return recommended_movies

#### Evaluate Model Performance

In [11]:
file_path = '../data/Full_Data.csv'
filtered_data = load_filtered_data(file_path)

Active users found: 5374
Popular movies found: 17283
Filtered dataset shape: (3343565, 3)


In [13]:
matrix, user_mapper, movie_mapper = build_user_item_matrix(filtered_data)

svd_model, reduced_matrix = train_svd(matrix)

User-Item matrix shape: (5374, 17280)
SVD reduced matrix shape: (5374, 50)


#### Test

In [43]:
user_id = 1673185
recommendations = recommend_movies(user_id, matrix, user_mapper, movie_mapper, svd_model, top_n=5)

print("Recommended Movies:", recommendations)

Recommended Movies: ['Dr. Quinn', 'Upstairs', 'Henry V', 'Bad Company', 'Richard III']
