In [4]:
# Cell 1: Import everything
import sys
sys.path.append('..')

from src.data_preprocessing import load_data, filter_sparse_data
from src.collaborative_filtering import UserBasedCollaborativeFilter
import pandas as pd
import numpy as np

In [6]:
ratings, movies = load_data()
filtered_ratings = filter_sparse_data(ratings)

In [7]:
from sklearn.model_selection import train_test_split

train_ratings, test_ratings = train_test_split(filtered_ratings, test_size=0.2, random_state=42)

In [11]:
model = UserBasedCollaborativeFilter(n_neighbors=50)
model.fit(train_ratings)

Creating user-item matrix...
Calculating user similarities...
Model training complete!


In [18]:
user_id = 100
recommendations = model.recommend_movies(user_id, n_recommendations=10)

print(f"Top 10 recommendations for User {user_id}:")
for movie_id, predicted_rating in recommendations:
    movie_title = movies[movies['movieId'] == movie_id]['title'].iloc[0]
    print(f"{movie_title}: {predicted_rating:.2f}")

Top 10 recommendations for User 100:
Shawshank Redemption, The (1994): 4.41
Godfather, The (1972): 4.39
Casablanca (1942): 4.37
Godfather: Part II, The (1974): 4.36
Schindler's List (1993): 4.32
Star Wars: Episode V - The Empire Strikes Back (1980): 4.31
Monty Python and the Holy Grail (1975): 4.30
Star Wars: Episode IV - A New Hope (1977): 4.27
Silence of the Lambs, The (1991): 4.27
Spirited Away (Sen to Chihiro no kamikakushi) (2001): 4.27


In [19]:
from sklearn.metrics import mean_squared_error

# Make predictions on test set
predictions = []
actuals = []

for _, row in test_ratings.head(1000).iterrows():  # Sample for speed
    pred = model.predict_rating(row['userId'], row['movieId'])
    predictions.append(pred)
    actuals.append(row['rating'])

rmse = np.sqrt(mean_squared_error(actuals, predictions))
print(f"RMSE: {rmse:.3f}")

RMSE: 0.914
