In [1]:
# 1. Imports & Data Load

import sys, os
sys.path.append(os.path.abspath(".."))

from src.data_loader import load_data
ratings, movies, df = load_data()

In [2]:
# 2. Convert Data to Surprise Format
from surprise import Dataset, Reader
from surprise.model_selection import train_test_split

# Setup reader: rating scale is 0.5 to 5.0
reader = Reader(rating_scale=(0.5, 5.0))
data = Dataset.load_from_df(df[['userId', 'movieId', 'rating']], reader)

# Train-test split
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

In [3]:
# 3. Train SVD Model
from surprise import SVD
from surprise import accuracy

# Initialize and train model
model = SVD()
model.fit(trainset)

# Predict on test set
predictions = model.test(testset)

# Evaluate
rmse = accuracy.rmse(predictions)

RMSE: 0.8671


In [5]:
# 4. Recommend Movies for a Specific User
# Get all movie IDs the user hasn't rated
user_id = 1
user_rated = df[df['userId'] == user_id]['movieId'].tolist()
all_movies = df['movieId'].unique()
unrated_movies = [m for m in all_movies if m not in user_rated]

# Predict ratings for all unrated movies
preds = [model.predict(user_id, mid) for mid in unrated_movies]

# Sort by predicted rating
top_preds = sorted(preds, key=lambda x: x.est, reverse=True)[:10]

# Map back to titles
top_movie_ids = [p.iid for p in top_preds]
top_movie_titles = movies[movies['movieId'].isin(top_movie_ids)][['movieId', 'title']]

import pandas as pd
# Merge predictions
recommendations = pd.DataFrame({
    'movieId': top_movie_ids,
    'predicted_rating': [p.est for p in top_preds]
}).merge(top_movie_titles, on='movieId')[['title', 'predicted_rating']]

recommendations


Unnamed: 0,title,predicted_rating
0,Requiem for a Dream (2000),4.3127
1,Touch of Evil (1958),4.299359
2,Chinatown (1974),4.274282
3,"Avengers, The (2012)",4.202444
4,This Is Spinal Tap (1984),4.188549
5,"Deer Hunter, The (1978)",4.179844
6,Princess Mononoke (Mononoke-hime) (1997),4.171366
7,M (1931),4.160939
8,Ran (1985),4.155887
9,Citizen Kane (1941),4.154462


In [6]:
from src.evaluation import precision_recall_at_k

# Evaluate Precision@K and Recall@K
precisions, recalls = precision_recall_at_k(predictions, k=10, threshold=3.5)

# Average over all users
avg_precision = sum(prec for prec in precisions.values()) / len(precisions)
avg_recall = sum(rec for rec in recalls.values()) / len(recalls)

print(f"Average Precision@10: {avg_precision:.4f}")
print(f"Average Recall@10: {avg_recall:.4f}")

Average Precision@10: 0.7368
Average Recall@10: 0.5319
