# Movie Recommendation System using Matrix Factorization (SVD)

In [None]:
# Import required libraries
import pandas as pd
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
from surprise.accuracy import rmse, mae

##  Load and Prepare the Dataset

In [None]:
# Load the MovieLens dataset (100k ratings)
data = Dataset.load_builtin('ml-100k')

# Split data into training and testing sets
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

##  Train the SVD Model

In [None]:
# Use SVD for matrix factorization-based collaborative filtering
model = SVD()
model.fit(trainset)

##  Evaluate the Model

In [None]:
# Evaluate the model on the test set
predictions = model.test(testset)

# Evaluation Metrics
print("📊 Evaluation Metrics")
print("RMSE:", rmse(predictions))
print("MAE:", mae(predictions))

##  Generate Top-N Recommendations for Users

In [None]:
# Show top-N recommendations for a specific user
def get_top_n(predictions, n=5):
    from collections import defaultdict
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]
    return top_n

top_n = get_top_n(predictions, n=5)

# Show recommendations for a few users
print("\n🎯 Top 5 Recommendations for 3 users:")
for uid, user_ratings in list(top_n.items())[:3]:
    print(f"\nUser {uid}:")
    for (iid, rating) in user_ratings:
        print(f"  Movie ID: {iid} | Predicted Rating: {rating:.2f}")

## Visualize Error Distribution

In [None]:
import matplotlib.pyplot as plt

errors = [abs(true_r - est) for (_, _, true_r, est, _) in predictions]
plt.hist(errors, bins=30, edgecolor='black')
plt.title("Prediction Error Distribution")
plt.xlabel("Absolute Error")
plt.ylabel("Frequency")
plt.grid(True)
plt.show()

## Recommend with Movie Titles

In [None]:
item_df = pd.read_csv(
    'https://files.grouplens.org/datasets/movielens/ml-100k/u.item',
    sep='|', encoding='latin-1', header=None, usecols=[0, 1], names=['movie_id', 'title']
)
movie_map = dict(zip(item_df.movie_id.astype(str), item_df.title))

# Show recommendations with titles
uid = list(top_n.keys())[0]
print(f"\n🎬 Recommendations for user {uid}:")
for iid, rating in top_n[uid]:
    print(f"{movie_map.get(iid, 'Unknown')} - Predicted Rating: {rating:.2f}")