In [1]:
# ================================
# Task 4: Movie Recommendation System
# Collaborative Filtering (SVD)
# ================================

# Step 1: Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split, cross_validate

# Step 2: Load Dataset
# CSV path: Task4/task4_movies.csv
df = pd.read_csv('Task4/task4_movies.csv')
print("Dataset preview:")
print(df.head())

# Plot ratings distribution
plt.figure(figsize=(6,4))
sns.countplot(x='rating', data=df, palette="Set2")
plt.title("Rating Distribution")
plt.show()

# Step 3: Prepare data for Surprise
reader = Reader(rating_scale=(1,5))
data = Dataset.load_from_df(df[['userId','movieId','rating']], reader)

# Step 4: Train-Test Split
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

# Step 5: Build SVD model
svd_model = SVD(n_factors=50, n_epochs=20, random_state=42)
svd_model.fit(trainset)

# Step 6: Evaluate model using cross-validation
cv_results = cross_validate(svd_model, data, measures=['RMSE','MAE'], cv=5, verbose=True)
print("\nAverage RMSE:", np.mean(cv_results['test_rmse']))
print("Average MAE:", np.mean(cv_results['test_mae']))

# Step 7: Predict ratings on testset
predictions = svd_model.test(testset)

# Step 8: Top-N Recommendations for each user
from collections import defaultdict

def get_top_n(predictions, n=3):
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]
    return top_n

top_n_recommendations = get_top_n(predictions, n=3)

# Display Top-N recommendations
print("\nTop-3 movie recommendations for users:")
for user_id, user_ratings in top_n_recommendations.items():
    print(f"User {user_id}: {[iid for (iid, _) in user_ratings]}")


ModuleNotFoundError: No module named 'pandas'