In [10]:
import pandas as pd
ratings = pd.read_csv('data/ml-latest-small/ratings.csv')
movies  = pd.read_csv('data/ml-latest-small/movies.csv')

In [11]:
from sklearn.decomposition import NMF
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# Create user-item matrix
user_item_matrix = ratings.pivot_table(index='userId', columns='movieId', values='rating', fill_value=0)
print(f"User-item matrix shape: {user_item_matrix.shape}")

# Apply Non-negative Matrix Factorization (similar to SVD)
nmf = NMF(n_components=10, random_state=42)
user_factors = nmf.fit_transform(user_item_matrix)
item_factors = nmf.components_

print("NMF model trained successfully!")
print(f"User factors shape: {user_factors.shape}")
print(f"Item factors shape: {item_factors.shape}")

User-item matrix shape: (3, 20)
NMF model trained successfully!
User factors shape: (3, 10)
Item factors shape: (10, 20)


In [12]:
def recommend_movies(user_id, N=10):
    """
    Generate movie recommendations for a given user using NMF model.
    
    Args:
        user_id (int): ID of the user to recommend movies for
        N (int): Number of recommendations to return
    
    Returns:
        list: List of tuples (movie_id, title) for recommended movies
    """
    try:
        # Get user's factor vector
        user_idx = user_item_matrix.index.get_loc(user_id)
        user_vector = user_factors[user_idx].reshape(1, -1)
        
        # Calculate predicted ratings for all items
        predicted_ratings = np.dot(user_vector, item_factors).flatten()
        
        # Get movie IDs that user hasn't rated
        rated_movies = set(ratings[ratings.userId == user_id]['movieId'].tolist())
        all_movie_ids = user_item_matrix.columns.tolist()
        unrated_movies = [mid for mid in all_movie_ids if mid not in rated_movies]
        
        if not unrated_movies:
            print(f"User {user_id} has rated all available movies!")
            return []
        
        # Get predictions for unrated movies
        movie_id_to_idx = {mid: idx for idx, mid in enumerate(all_movie_ids)}
        predictions = []
        for movie_id in unrated_movies:
            movie_idx = movie_id_to_idx[movie_id]
            pred_rating = predicted_ratings[movie_idx]
            predictions.append((movie_id, pred_rating))
        
        # Sort by predicted rating and get top N
        predictions.sort(key=lambda x: x[1], reverse=True)
        top_movies = predictions[:N]
        
        # Get movie titles
        movie_titles = movies.set_index('movieId')['title'].to_dict()
        recommendations = [(mid, movie_titles.get(mid, f"Movie {mid}")) for mid, _ in top_movies]
        
        return recommendations
    except KeyError:
        print(f"User {user_id} not found in dataset")
        return []
    except Exception as e:
        print(f"Error generating recommendations: {e}")
        return []

# Test the recommendation function for all users
print("🎬 MOVIE RECOMMENDATIONS")
print("=" * 50)

for user_id in [1, 2, 3]:
    print(f"\nRecommendations for User {user_id}:")
    recommendations = recommend_movies(user_id, N=5)
    if recommendations:
        for i, (movie_id, title) in enumerate(recommendations, 1):
            print(f"  {i}. {title} (ID: {movie_id})")
    else:
        print("  No recommendations available")

🎬 MOVIE RECOMMENDATIONS

Recommendations for User 1:
  1. Jumanji (1995) (ID: 2)
  2. Waiting to Exhale (1995) (ID: 4)
  3. Sabrina (1995) (ID: 7)
  4. Tom and Huck (1995) (ID: 8)
  5. Sudden Death (1995) (ID: 9)

Recommendations for User 2:
  1. Jumanji (1995) (ID: 2)
  2. Waiting to Exhale (1995) (ID: 4)
  3. Sabrina (1995) (ID: 7)
  4. Tom and Huck (1995) (ID: 8)
  5. Sudden Death (1995) (ID: 9)

Recommendations for User 3:
  1. Toy Story (1995) (ID: 1)
  2. Grumpier Old Men (1995) (ID: 3)
  3. Heat (1995) (ID: 6)
  4. The Usual Suspects (1995) (ID: 47)
  5. Star Wars: Episode IV - A New Hope (1977) (ID: 50)


In [13]:
import joblib

# Save the trained model and related data
model_data = {
    'nmf_model': nmf,
    'user_factors': user_factors,
    'item_factors': item_factors,
    'user_item_matrix': user_item_matrix,
    'movies': movies,
    'ratings': ratings
}

joblib.dump(model_data, 'recommender_model.pkl')
print("✅ Model saved successfully as 'recommender_model.pkl'")

# Test loading the model
loaded_data = joblib.load('recommender_model.pkl')
print("✅ Model loaded successfully!")
print(f"📊 Model details:")
print(f"   - Model type: {type(loaded_data['nmf_model'])}")
print(f"   - User factors shape: {loaded_data['user_factors'].shape}")
print(f"   - Item factors shape: {loaded_data['item_factors'].shape}")
print(f"   - Movies in dataset: {len(loaded_data['movies'])}")
print(f"   - Ratings in dataset: {len(loaded_data['ratings'])}")
print(f"   - Users in dataset: {loaded_data['ratings']['userId'].nunique()}")

# Test recommendation function with loaded model
def recommend_from_loaded_model(user_id, N=5):
    """Generate recommendations using the loaded model"""
    try:
        # Get user's factor vector
        user_idx = loaded_data['user_item_matrix'].index.get_loc(user_id)
        user_vector = loaded_data['user_factors'][user_idx].reshape(1, -1)
        
        # Calculate predicted ratings
        predicted_ratings = np.dot(user_vector, loaded_data['item_factors']).flatten()
        
        # Get unrated movies
        rated_movies = set(loaded_data['ratings'][loaded_data['ratings'].userId == user_id]['movieId'].tolist())
        all_movie_ids = loaded_data['user_item_matrix'].columns.tolist()
        unrated_movies = [mid for mid in all_movie_ids if mid not in rated_movies]
        
        if not unrated_movies:
            return []
        
        # Get predictions and sort
        movie_id_to_idx = {mid: idx for idx, mid in enumerate(all_movie_ids)}
        predictions = [(movie_id, predicted_ratings[movie_id_to_idx[movie_id]]) for movie_id in unrated_movies]
        predictions.sort(key=lambda x: x[1], reverse=True)
        
        # Get movie titles
        movie_titles = loaded_data['movies'].set_index('movieId')['title'].to_dict()
        return [(mid, movie_titles.get(mid, f"Movie {mid}")) for mid, _ in predictions[:N]]
    except Exception as e:
        print(f"Error: {e}")
        return []

print(f"\n🎬 Testing recommendations with loaded model:")
for user_id in [1, 2, 3]:
    recommendations = recommend_from_loaded_model(user_id, N=3)
    if recommendations:
        print(f"   User {user_id}: {', '.join([title for _, title in recommendations])}")
    else:
        print(f"   User {user_id}: No recommendations available")


✅ Model saved successfully as 'recommender_model.pkl'
✅ Model loaded successfully!
📊 Model details:
   - Model type: <class 'sklearn.decomposition._nmf.NMF'>
   - User factors shape: (3, 10)
   - Item factors shape: (10, 20)
   - Movies in dataset: 27
   - Ratings in dataset: 30
   - Users in dataset: 3

🎬 Testing recommendations with loaded model:
   User 1: Jumanji (1995), Waiting to Exhale (1995), Sabrina (1995)
   User 2: Jumanji (1995), Waiting to Exhale (1995), Sabrina (1995)
   User 3: Toy Story (1995), Grumpier Old Men (1995), Heat (1995)


In [14]:
# 🚀 Using the Saved Model Independently
# This cell demonstrates how to load and use the saved model in a new session

import joblib
import numpy as np

# Load the saved model
print("📂 Loading saved recommender model...")
model_data = joblib.load('recommender_model.pkl')

# Extract components
nmf_model = model_data['nmf_model']
user_factors = model_data['user_factors']
item_factors = model_data['item_factors']
user_item_matrix = model_data['user_item_matrix']
movies = model_data['movies']
ratings = model_data['ratings']

print("✅ Model loaded successfully!")
print(f"📊 Dataset: {len(movies)} movies, {len(ratings)} ratings, {ratings['userId'].nunique()} users")

# Standalone recommendation function
def get_recommendations(user_id, num_recommendations=5):
    """
    Get movie recommendations for a user using the saved model.
    
    Args:
        user_id (int): ID of the user
        num_recommendations (int): Number of recommendations to return
    
    Returns:
        list: List of recommended movies with titles
    """
    try:
        # Check if user exists
        if user_id not in user_item_matrix.index:
            return f"User {user_id} not found in dataset"
        
        # Get user's factor vector
        user_idx = user_item_matrix.index.get_loc(user_id)
        user_vector = user_factors[user_idx].reshape(1, -1)
        
        # Calculate predicted ratings for all movies
        predicted_ratings = np.dot(user_vector, item_factors).flatten()
        
        # Find movies user hasn't rated
        rated_movies = set(ratings[ratings.userId == user_id]['movieId'].tolist())
        all_movie_ids = user_item_matrix.columns.tolist()
        unrated_movies = [mid for mid in all_movie_ids if mid not in rated_movies]
        
        if not unrated_movies:
            return f"User {user_id} has rated all available movies!"
        
        # Get predictions for unrated movies
        movie_id_to_idx = {mid: idx for idx, mid in enumerate(all_movie_ids)}
        predictions = []
        for movie_id in unrated_movies:
            movie_idx = movie_id_to_idx[movie_id]
            pred_rating = predicted_ratings[movie_idx]
            predictions.append((movie_id, pred_rating))
        
        # Sort by predicted rating and get top recommendations
        predictions.sort(key=lambda x: x[1], reverse=True)
        top_movies = predictions[:num_recommendations]
        
        # Get movie titles
        movie_titles = movies.set_index('movieId')['title'].to_dict()
        recommendations = []
        for movie_id, pred_rating in top_movies:
            title = movie_titles.get(movie_id, f"Movie {movie_id}")
            recommendations.append({
                'movie_id': movie_id,
                'title': title,
                'predicted_rating': round(pred_rating, 2)
            })
        
        return recommendations
        
    except Exception as e:
        return f"Error generating recommendations: {e}"

# Test the standalone function
print("\n🎬 Testing standalone recommendation function:")
for user_id in [1, 2, 3]:
    print(f"\n📋 Recommendations for User {user_id}:")
    recommendations = get_recommendations(user_id, 3)
    
    if isinstance(recommendations, list):
        for i, rec in enumerate(recommendations, 1):
            print(f"   {i}. {rec['title']} (Predicted Rating: {rec['predicted_rating']})")
    else:
        print(f"   {recommendations}")

print("\n🎉 Model is ready for production use!")


📂 Loading saved recommender model...
✅ Model loaded successfully!
📊 Dataset: 27 movies, 30 ratings, 3 users

🎬 Testing standalone recommendation function:

📋 Recommendations for User 1:
   1. Jumanji (1995) (Predicted Rating: 0.0)
   2. Waiting to Exhale (1995) (Predicted Rating: 0.0)
   3. Sabrina (1995) (Predicted Rating: 0.0)

📋 Recommendations for User 2:
   1. Jumanji (1995) (Predicted Rating: 0.0)
   2. Waiting to Exhale (1995) (Predicted Rating: 0.0)
   3. Sabrina (1995) (Predicted Rating: 0.0)

📋 Recommendations for User 3:
   1. Toy Story (1995) (Predicted Rating: 0.0)
   2. Grumpier Old Men (1995) (Predicted Rating: 0.0)
   3. Heat (1995) (Predicted Rating: 0.0)

🎉 Model is ready for production use!
