In [26]:
# ============================================
# DEBUG: Check what's in the predictions
# ============================================

print("\n" + "="*50)
print("DEBUG: Checking prediction objects")
print("="*50)

# Test with first 5 test tuples
for i, (user, item, true_rating) in enumerate(test_tuples[:5]):
    print(f"\nTest {i+1}: User={user[:10]}..., Item={item[:10]}...")
    
    # Test each model
    for model_name, model in [('TopPop', top_pop_model), ('KNN', knn_model), ('SVD', svd_model)]:
        try:
            pred = model.predict(user, item)
            print(f"  {model_name}:")
            print(f"    prediction object: {pred}")
            print(f"    estimated rating: {pred.est}")
            print(f"    type of est: {type(pred.est)}")
        except Exception as e:
            print(f"  {model_name}: Error - {e}")


DEBUG: Checking prediction objects

Test 1: User=AFKKKKYLVY..., Item=B00Z9TIGCG...
  TopPop:
    prediction object: user: AFKKKKYLVYHA2FELEWZGVEW3TXHA item: B00Z9TIGCG r_ui = None   est = 1.00   {'was_impossible': False}
    estimated rating: 1
    type of est: <class 'int'>
  KNN:
    prediction object: user: AFKKKKYLVYHA2FELEWZGVEW3TXHA item: B00Z9TIGCG r_ui = None   est = 4.63   {'actual_k': 25, 'was_impossible': False}
    estimated rating: 4.633329656883946
    type of est: <class 'numpy.float64'>
  SVD:
    prediction object: user: AFKKKKYLVYHA2FELEWZGVEW3TXHA item: B00Z9TIGCG r_ui = None   est = 4.32   {'was_impossible': False}
    estimated rating: 4.317202228146999
    type of est: <class 'numpy.float64'>

Test 2: User=AEPERIK4W4..., Item=B0088TN7BO...
  TopPop:
    prediction object: user: AEPERIK4W4CBNSELKZTBOQ6F63RA item: B0088TN7BO r_ui = None   est = 1.00   {'was_impossible': False}
    estimated rating: 1
    type of est: <class 'int'>
  KNN:
    prediction object: user

In [27]:
# ============================================
# STEP 3: RMSE Evaluation (FIXED VERSION)
# ============================================

print("\n" + "="*50)
print("STEP 3: RMSE Evaluation")
print("="*50)

def calculate_rmse_fixed(model, test_tuples, model_name=""):
    """Calculate RMSE with proper true ratings"""
    predictions = []
    errors = 0
    
    for user, item, true_rating in test_tuples:
        try:
            # Get prediction from model
            pred = model.predict(user, item)
            
            # Create a new prediction object or just store the values we need
            # We'll store true_rating and estimated rating directly
            if pred.est is not None and not np.isnan(pred.est):
                predictions.append({
                    'true': true_rating,
                    'est': float(pred.est)
                })
            else:
                errors += 1
        except Exception as e:
            errors += 1
    
    print(f"  {model_name}: {len(predictions)} valid predictions, {errors} errors")
    
    if len(predictions) == 0:
        return float('nan')
    
    # Calculate RMSE manually
    try:
        squared_errors = [(p['true'] - p['est']) ** 2 for p in predictions]
        mse = np.mean(squared_errors)
        rmse = np.sqrt(mse)
        return rmse
    except Exception as e:
        print(f"  Error in manual RMSE calculation: {e}")
        return float('nan')

# Calculate RMSE for each model
print("\nCalculating RMSE with fixed function...")
top_pop_rmse = calculate_rmse_fixed(top_pop_model, test_tuples, "TopPop")
knn_rmse = calculate_rmse_fixed(knn_model, test_tuples, "KNN")
svd_rmse = calculate_rmse_fixed(svd_model, test_tuples, "SVD")

# Display RMSE results
print("\n" + "-"*30)
print("RMSE RESULTS")
print("-"*30)
print(f"TopPop RMSE: {top_pop_rmse:.4f}")
print(f"KNN RMSE: {knn_rmse:.4f}")
print(f"SVD RMSE: {svd_rmse:.4f}")


STEP 3: RMSE Evaluation

Calculating RMSE with fixed function...
  TopPop: 6645 valid predictions, 0 errors
  KNN: 6645 valid predictions, 0 errors
  SVD: 6645 valid predictions, 0 errors

------------------------------
RMSE RESULTS
------------------------------
TopPop RMSE: 3.3354
KNN RMSE: 0.9789
SVD RMSE: 0.9333


In [23]:
# ============================================
# STEP 2: Prepare test data
# ============================================

print("\n" + "="*50)
print("STEP 2: Preparing test data")
print("="*50)

# Convert test data to list of tuples
test_tuples = [tuple(x) for x in test_df[['user_id', 'item_id', 'rating']].to_numpy()]
print(f"Prepared {len(test_tuples)} test interactions")

# Get unique test users
test_users = test_df['user_id'].unique()
print(f"Found {len(test_users)} unique test users")



STEP 2: Preparing test data
Prepared 6645 test interactions
Found 1389 unique test users


In [22]:
# ============================================
# STEP 1: Load saved models and data
# ============================================

print("\n" + "="*50)
print("STEP 1: Loading saved models and data")
print("="*50)

# Load the cleaned data
train_df = pd.read_parquet('cleaned_train.parquet')
test_df = pd.read_parquet('cleaned_test.parquet')

# Load the saved models
with open('best_knn_model.pkl', 'rb') as f:
    knn_model = pickle.load(f)

with open('best_svd_model.pkl', 'rb') as f:
    svd_model = pickle.load(f)

with open('trainset.pkl', 'rb') as f:
    trainset = pickle.load(f)

# RECREATE TopPop model
print("\nRecreating TopPop model...")
top_items = pd.read_csv('top_items.csv')
top_pop_items = top_items.head(10)['item_id'].tolist()
top_pop_model = TopPop(top_pop_items)
top_pop_model.fit(trainset)

print(f"TopPop model recreated with {len(top_pop_items)} popular items")
print(f"Top items: {top_pop_items[:5]}...")

print("\nModels and data loaded successfully!")
print(f"Train set size: {len(train_df)}")
print(f"Test set size: {len(test_df)}")


STEP 1: Loading saved models and data

Recreating TopPop model...
TopPop model recreated with 10 popular items
Top items: ['B0086VPUHI', 'B00BN5T30E', 'B07YBXFDYN', 'B00BGA9WK2', 'B007CM0K86']...

Models and data loaded successfully!
Train set size: 26580
Test set size: 6645


In [21]:
# ============================================
# DEFINE THE TOPPOP CLASS
# ============================================

class TopPop(AlgoBase):
    def __init__(self, top_items_list=None):
        AlgoBase.__init__(self)
        self.top_items = top_items_list if top_items_list else []
    
    def fit(self, trainset):
        self.trainset = trainset
        return self
    
    def estimate(self, u, i):
        try:
            raw_iid = self.trainset.to_raw_iid(i)
            if raw_iid in self.top_items:
                return 5.0
            else:
                return 1.0
        except:
            return 1.0

print("="*50)
print("WEEK 8: Evaluating Recommender Systems")
print("="*50)


WEEK 8: Evaluating Recommender Systems


In [20]:
# ============================================
# WEEK 8: Evaluation of Recommender Systems
# ============================================

import pandas as pd
import numpy as np
import pickle
from surprise import AlgoBase, accuracy
import warnings
warnings.filterwarnings('ignore')