In [1]:
import sys
sys.path.append('..')

import pandas as pd
import numpy as np
from scipy.sparse import load_npz, save_npz  # ✅ Added save_npz here
import pickle
import importlib

# Force reload the module to get latest changes
import src.models.collaborative_filtering as cf_module
importlib.reload(cf_module)
from src.models.collaborative_filtering import CollaborativeFiltering

# Load processed data
print("Loading processed data...")
movies_processed = pd.read_csv('../data/movies_processed.csv')
ratings_processed = pd.read_csv('../data/ratings_processed.csv')
user_item_matrix = load_npz('../data/user_item_matrix_sparse.npz')

# Load encoders
with open('../data/user_encoder.pkl', 'rb') as f:
    user_encoder = pickle.load(f)

with open('../data/movie_encoder.pkl', 'rb') as f:
    movie_encoder = pickle.load(f)

print("Data loaded successfully!")
print(f"User-item matrix shape: {user_item_matrix.shape}")

# Initialize collaborative filtering model
print("\nInitializing Collaborative Filtering Model...")
cf_model = CollaborativeFiltering(user_item_matrix)

print("\n=== Training APPROXIMATE User-Based Collaborative Filtering ===")
print("Using KNN for memory-efficient similarity...")
knn_model = cf_model.approximate_user_similarity(n_neighbors=50)

print("\n=== Training Item-Based Collaborative Filtering (Sampled) ===")
item_similarity, sample_indices = cf_model.item_based_similarity_optimized(sample_size=2000)

print("\n=== Training Matrix Factorization ===")
factorized_matrix = cf_model.matrix_factorization(n_components=100)

# Save trained models
print("\nSaving trained models...")
import os
os.makedirs('../models', exist_ok=True)

# Save KNN model
with open('../models/knn_model.pkl', 'wb') as f:
    pickle.dump(knn_model, f)

# Save item similarity sample
np.save('../models/item_similarity_sample_indices.npy', sample_indices)
save_npz('../models/item_similarity_sample.npz', item_similarity)  # ✅ Now this will work

# Save factorized matrix
np.save('../models/factorized_matrix.npy', factorized_matrix)

# Save SVD model
with open('../models/svd_model.pkl', 'wb') as f:
    pickle.dump(cf_model.svd_model, f)

print("All models saved successfully!")

# Test recommendations with memory-efficient methods
def test_memory_efficient_recommendations(user_id=1, n_recommendations=5):
    """Test recommendations using memory-efficient methods"""
    
    try:
        user_idx = user_encoder.transform([user_id])[0]
    except:
        print(f"User ID {user_id} not found. Using first user.")
        user_idx = 0
    
    print(f"\n=== Testing Memory-Efficient Recommendations for User {user_id} ===")
    
    # Test KNN-based recommendations
    print("\n1. KNN-Based Recommendations:")
    knn_indices, knn_scores = cf_model.get_user_recommendations_knn(user_idx, n_recommendations)
    
    for i, (movie_idx, score) in enumerate(zip(knn_indices, knn_scores)):
        movie_id = movie_encoder.inverse_transform([movie_idx])[0]
        movie_info = movies_processed[movies_processed['movieId'] == movie_id]
        if not movie_info.empty:
            title = movie_info['title'].values[0]
            print(f"   {i+1}. {title} (Score: {score:.3f})")
    
    # Test SVD-based recommendations
    print("\n2. SVD-Based Recommendations:")
    svd_indices, svd_scores = cf_model.get_svd_recommendations(user_idx, n_recommendations)
    
    for i, (movie_idx, score) in enumerate(zip(svd_indices, svd_scores)):
        movie_id = movie_encoder.inverse_transform([movie_idx])[0]
        movie_info = movies_processed[movies_processed['movieId'] == movie_id]
        if not movie_info.empty:
            title = movie_info['title'].values[0]
            print(f"   {i+1}. {title} (Score: {score:.3f})")
    
    # Test Hybrid recommendations
    print("\n3. Hybrid Recommendations (KNN + SVD):")
    hybrid_indices, hybrid_scores = cf_model.get_hybrid_recommendations(user_idx, n_recommendations)
    
    for i, (movie_idx, score) in enumerate(zip(hybrid_indices, hybrid_scores)):
        movie_id = movie_encoder.inverse_transform([movie_idx])[0]
        movie_info = movies_processed[movies_processed['movieId'] == movie_id]
        if not movie_info.empty:
            title = movie_info['title'].values[0]
            print(f"   {i+1}. {title} (Score: {score:.3f})")

# Test with a user
test_memory_efficient_recommendations(user_id=1)

Loading processed data...
Data loaded successfully!
User-item matrix shape: (610, 2269)

Initializing Collaborative Filtering Model...

=== Training APPROXIMATE User-Based Collaborative Filtering ===
Using KNN for memory-efficient similarity...
Calculating approximate user-user similarity using KNN...
KNN model fitted in 0.00 seconds

=== Training Item-Based Collaborative Filtering (Sampled) ===
Calculating item-item similarity on sample...
Item similarity calculated on 2000 items in 0.12 seconds

=== Training Matrix Factorization ===
Applying Matrix Factorization with SVD...
Explained variance ratio: 0.6906
Factorized matrix shape: (610, 100)
SVD completed in 0.15 seconds

Saving trained models...
All models saved successfully!

=== Testing Memory-Efficient Recommendations for User 1 ===

1. KNN-Based Recommendations:
   1. Terminator 2: Judgment Day (1991) (Score: 4.342)
   2. Aliens (1986) (Score: 4.237)
   3. Sixth Sense, The (1999) (Score: 3.895)
   4. Hunt for Red October, The (1