In [1]:
# Data manipulation and analysis
import pandas as pd 
import numpy as np    

# Data visualization
import matplotlib.pyplot as plt  
import seaborn as sns            
import plotly.exceptions as px  

# Ignore warnings (useful during development to reduce console noise)
import warnings
warnings.filterwarnings("ignore")

# TensorFlow and Keras for deep learning and recommendation systems
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, optimizers, losses, metrics  
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard, ReduceLROnPlateau, ModelCheckpoint  
from tensorflow.keras import regularizers 
import tensorflow_recommenders as tfrs  
from tensorflow.keras import metrics as keras_metrics  # To avoid conflicts with sklearn.metrics
from tensorflow_recommenders import metrics as tfrs_metrics


# Scikit-learn for preprocessing, model evaluation, and splitting
from sklearn.model_selection import train_test_split 
from sklearn.preprocessing import StandardScaler, MinMaxScaler  
from sklearn.feature_extraction.text import TfidfVectorizer 
from sklearn.metrics.pairwise import cosine_similarity 
from sklearn.impute import SimpleImputer  
from sklearn.metrics import mean_squared_error, mean_absolute_error  
from sklearn.metrics import r2_score, precision_score, recall_score, f1_score, ndcg_score  

# Other utility libraries
import tempfile  # For creating temporary directories/files
import os       
import math     
from collections import defaultdict  # For handling dictionary-like structures with default values

# Deployment the model
from tensorflow.keras.models import load_model  # To load a saved Keras model
from keras.saving import register_keras_serializable  # To register custom Keras components for saving/loading
import pickle  # For serializing Python objects (like models or encoders)



In [2]:
class NeuralCollaborativeFilteringDataProcessor:
    def __init__(self, df):
        self.df = df.copy()
        self._preprocess_data()
        
    def _preprocess_data(self):
        """Enhanced data cleaning and preparation"""
        # Robust missing value handling
        self.df["movies_avg_rating"] = (
            self.df["movies_avg_rating"]
            .fillna(self.df.groupby("movieId")["movies_avg_rating"].transform("mean"))
            .fillna(self.df["movies_avg_rating"].mean())
        )

        # Enhanced user statistics
        self.df["user_rating_count"] = self.df.groupby("userId")["user_rating"].transform("count")
        self.df["user_rating_avg"] = self.df.groupby("userId")["user_rating"].transform("mean")
        self.df["user_rating_std"] = (
            self.df.groupby("userId")["user_rating"].transform("std").fillna(0)
        )

        # Create mappings for user and movie IDs
        self.user_ids = self.df['userId'].unique().tolist()
        self.movie_ids = self.df['movieId'].unique().tolist()
        
        self.user_id_lookup = {id: idx for idx, id in enumerate(self.user_ids)}
        self.movie_id_lookup = {id: idx for idx, id in enumerate(self.movie_ids)}
        self.reverse_movie_id_lookup = {idx: id for id, idx in self.movie_id_lookup.items()}
        
        # Encode IDs
        self.df['user_id_encoded'] = self.df['userId'].map(self.user_id_lookup)
        self.df['movie_id_encoded'] = self.df['movieId'].map(self.movie_id_lookup)
        
        # Create movie title mapping
        self.movie_titles = dict(zip(self.df['movieId'], self.df['movie_title']))
        
    def get_train_test_split(self, test_size=0.2, random_state=42):
        """Split data into train and test sets"""
        X = self.df[['user_id_encoded', 'movie_id_encoded']]
        y = self.df['user_rating']
        return train_test_split(X, y, test_size=test_size, random_state=random_state)

In [3]:
class NeuralCollaborativeFilteringModel(tfrs.Model):
    def __init__(self, user_count, movie_count, embedding_dim=64):
        super().__init__()
        
        # User and movie embedding layers
        self.user_embedding = layers.Embedding(
            input_dim=user_count,
            output_dim=embedding_dim,
            embeddings_regularizer=regularizers.l2(1e-6)
        )
        self.movie_embedding = layers.Embedding(
            input_dim=movie_count,
            output_dim=embedding_dim,
            embeddings_regularizer=regularizers.l2(1e-6)
        )
        
        # Neural network layers
        self.mlp_layers = tf.keras.Sequential([
            layers.Dense(256, activation='relu'),
            layers.BatchNormalization(),
            layers.Dropout(0.3),
            layers.Dense(128, activation='relu'),
            layers.Dense(1)
        ])
        
        # Rating prediction task
        self.task = tfrs.tasks.Ranking(
            loss=losses.MeanSquaredError(),
            metrics=[
                tf.keras.metrics.RootMeanSquaredError(),
                tf.keras.metrics.MeanAbsoluteError()
            ]
        )
        
    def call(self, features):
        user_embeddings = self.user_embedding(features["user_id_encoded"])
        movie_embeddings = self.movie_embedding(features["movie_id_encoded"])
        concatenated = tf.concat([user_embeddings, movie_embeddings], axis=1)
        return self.mlp_layers(concatenated)
        
    def compute_loss(self, inputs, training=False):
        # Create a copy of the features instead of modifying inputs
        features = {
            "user_id_encoded": inputs["user_id_encoded"],
            "movie_id_encoded": inputs["movie_id_encoded"]
        }
        labels = inputs["user_rating"]  # Don't pop, just access
        
        predictions = self(features)
        loss = self.task(labels=labels, predictions=predictions)
        regularization_loss = tf.reduce_sum(self.losses)
        return loss + 1e-6 * regularization_loss

In [4]:
class NeuralCollaborativeFilteringSystem:  
    def __init__(self, data_processor):
        self.data_processor = data_processor
        self.model = None
        self.scaler = MinMaxScaler(feature_range=(0, 1))
        self.train_ds = None
        self.test_ds = None
        self.original_test_df = None

    def recommend_for_user(self, user_id, top_n=10, diversity=0.2):
        """Generate recommendations for a user with diversity"""
        try:
            if user_id not in self.data_processor.user_id_lookup:
                return self.get_popular_recommendations(top_n)
                
            user_encoded = self.data_processor.user_id_lookup[user_id]
            seen_movies = set(self.data_processor.df[self.data_processor.df['userId'] == user_id]['movieId'])
            
            # Get all unseen movies
            candidate_movies = [mid for mid in self.data_processor.movie_ids if mid not in seen_movies]
            movie_encoded = [self.data_processor.movie_id_lookup[mid] for mid in candidate_movies]
            
            # Batch predictions to avoid memory issues
            batch_size = 1000
            predictions = []
            for i in range(0, len(movie_encoded), batch_size):
                batch = movie_encoded[i:i+batch_size]
                user_batch = [user_encoded] * len(batch)
                
                preds = self.model({
                    "user_id_encoded": tf.convert_to_tensor(user_batch),
                    "movie_id_encoded": tf.convert_to_tensor(batch)
                }).numpy().flatten()
                
                predictions.extend(preds)
            
            predictions = np.array(predictions)
            
            # Add diversity
            if diversity > 0:
                noise = np.random.normal(0, diversity * np.std(predictions), len(predictions))
                predictions = predictions + noise
                
            # Get top recommendations
            top_indices = np.argsort(-predictions)[:top_n*3]
            candidates = pd.DataFrame({
                'movieId': np.array(candidate_movies)[top_indices],
                'predicted_rating': predictions[top_indices]
            })
            
            # Merge with movie info
            candidates = candidates.merge(
                self.data_processor.df[['movieId', 'movie_title', 'genres', 'movies_avg_rating']].drop_duplicates(),
                on='movieId'
            )
            
            # Deduplicate and diversify by genres
            final_recs = []
            seen_genres = set()
            for _, row in candidates.iterrows():
                movie_genres = set(row['genres'].split('|')) if pd.notna(row['genres']) else set()
                
                if len(seen_genres & movie_genres) / len(movie_genres) > 0.7:
                    continue
                    
                final_recs.append({
                    "movieId": row['movieId'],
                    "title": row['movie_title'],
                    "genres": row['genres'],
                    "predicted_rating": float(row['predicted_rating']),
                    "average_rating": row['movies_avg_rating']
                })
                seen_genres.update(movie_genres)
                
                if len(final_recs) >= top_n:
                    break
                    
            return pd.DataFrame(final_recs)
            
        except Exception as e:
            print(f"Error in recommendations for user {user_id}: {str(e)}")
            return self.get_popular_recommendations(top_n)
            
    def get_popular_recommendations(self, top_n=10):
        """Fallback to popular items when personalized recs fail"""
        try:
            popular = self.data_processor.df.groupby('movieId').agg({
                'movie_title': 'first',
                'genres': 'first',
                'movies_avg_rating': 'mean',
                'user_rating': 'count'
            }).sort_values(['user_rating', 'movies_avg_rating'], ascending=False).head(top_n)
            
            return pd.DataFrame([{
                "movieId": idx,
                "title": row['movie_title'],
                "genres": row['genres'],
                "predicted_rating": row['movies_avg_rating'],
                "average_rating": row['movies_avg_rating'],
                "popularity": row['user_rating']
            } for idx, row in popular.iterrows()])
            
        except Exception as e:
            print(f"Error in popular recommendations: {str(e)}")
            return pd.DataFrame([{
                "movieId": -1,
                "title": "No recommendations available",
                "genres": "",
                "predicted_rating": 0,
                "average_rating": "N/A"
            }])
            
    @classmethod
    def load_for_deployment(cls, data_processor, base_path="ncf_deployment"):
        """Load a saved model for deployment"""
        recommender = cls(data_processor)
        
        # Load model weights
        user_count = len(data_processor.user_ids)
        movie_count = len(data_processor.movie_ids)
        
        recommender.model = NeuralCollaborativeFilteringModel(user_count, movie_count)
        recommender.model.load_weights(os.path.join(base_path, "model_weights.weights.h5"))
        
        # Load other assets
        with open(os.path.join(base_path, "deployment_assets.pkl"), 'rb') as f:
            assets = pickle.load(f)
            recommender.scaler = assets['scaler']
            data_processor.user_id_lookup = assets['user_id_lookup']
            data_processor.movie_id_lookup = assets['movie_id_lookup']
            data_processor.reverse_movie_id_lookup = assets['reverse_movie_id_lookup']
            data_processor.movie_titles = assets['movie_titles']
            data_processor.user_ids = assets['user_ids']
            data_processor.movie_ids = assets['movie_ids']
            
        return recommender

In [5]:
df = pd.read_csv("df1.csv")

In [6]:
# select subset of data with user_rating_count > 20
df2 = df[df['user_rating_count'] > 20]

In [7]:
# Initialize the data processor for Neural Collaborative Filtering
data_processor = NeuralCollaborativeFilteringDataProcessor(df2)

## 6. Deployment Architecture

In [8]:
# Load the trained Neural Content Based Filtering model and related components from the "my_recommender_deployment" directory
# This allows reuse of the model without retraining
loaded_recommender = NeuralCollaborativeFilteringSystem.load_for_deployment(
    data_processor, "ncf_deployment"
)

In [9]:
loaded_recommender.recommend_for_user(139143,5)

Unnamed: 0,movieId,title,genres,predicted_rating,average_rating
0,1417,"Portrait of a Lady, The",Drama,0.075001,3.38
1,68159,State of Play,Crime|Drama|Thriller,0.072879,3.59
2,6720,Kuffs,Action|Comedy|Crime,0.069061,2.69
3,84894,Run of the Arrow,Western,0.066655,4.0
4,87234,Submarine,Comedy|Drama|Romance,0.065923,3.62


In [None]:
import gradio as gr
import requests
from urllib.parse import quote

# TMDB API Configuration (same as before)
TMDB_API_KEY = "c4b76d5deff8af9434b07dde68c7c158"
TMDB_BASE_URL = "https://api.themoviedb.org/3"
POSTER_BASE_URL = "https://image.tmdb.org/t/p/w200"


def get_movie_poster(movie_title, year=None):
    """Fetch movie poster URL from TMDB API"""
    try:
        encoded_title = quote(movie_title)
        search_url = (
            f"{TMDB_BASE_URL}/search/movie?api_key={TMDB_API_KEY}&query={encoded_title}"
        )
        if year:
            search_url += f"&year={year}"

        response = requests.get(search_url)
        response.raise_for_status()
        data = response.json()

        if data.get("results"):
            for movie in data["results"]:
                if movie.get("poster_path"):
                    return f"{POSTER_BASE_URL}{movie['poster_path']}"
    except Exception as e:
        print(f"Error fetching poster for {movie_title}: {str(e)}")
    return None


def movie_card(movie_title, genres, rating, year=None):
    """Create a styled movie card with poster"""
    poster_url = get_movie_poster(movie_title, year)

    card = f"""
    <div style="
        display: flex; 
        background: #2d2d2d;
        border-radius: 10px;
        overflow: hidden;
        margin: 10px 0;
        box-shadow: 0 4px 8px rgba(0,0,0,0.2);
        height: 150px;
        width: 100%;
    ">
    """

    if poster_url:
        card += f"""
        <img src="{poster_url}" style="
            height: 150px; 
            width: 100px; 
            object-fit: cover;
        ">
        """
    else:
        card += """
        <div style="
            height: 150px; 
            width: 100px; 
            background: #1f1f1f; 
            display: flex; 
            align-items: center; 
            justify-content: center;
        ">
            <span style="color: #555; font-size: 12px;">No poster</span>
        </div>
        """

    card += f"""
        <div style="padding: 15px; flex-grow: 1;">
            <h3 style="margin: 0; color: #fff; font-weight: 600;">{movie_title}</h3>
            <p style="margin: 5px 0; color: #aaa; font-size: 14px;">{genres}</p>
            <div style="display: flex; align-items: center; margin-top: 10px;">
                <span style="color: #ffb700; font-weight: bold;">⭐ {rating:.1f}</span>
    """

    if year:
        card += f"""
                <span style="margin-left: 15px; color: #888; font-size: 13px;">{year}</span>
        """

    card += """
            </div>
        </div>
    </div>
    """

    return card


def create_recommender_interface(loaded_recommender):
    """Create Gradio interface for the NCF recommender"""
    try:
        # Get movie titles from the recommender's data
        movie_options = (
            loaded_recommender.movie_features["movie_title"].dropna().unique().tolist()
        )
        if not movie_options:
            movie_options = ["No movies available"]
    except Exception as e:
        print(f"Error loading movie options: {str(e)}")
        movie_options = ["Error loading movies"]

    with gr.Blocks(
        theme=gr.themes.Default(
            primary_hue="orange",
            secondary_hue="amber",
            neutral_hue="slate",
            font=[gr.themes.GoogleFont("Poppins")],
        ),
        title="NCF Movie Recommender",
    ) as interface:

        gr.Markdown(
            """
        <div style="text-align: center;">
            <h1 style="color: #ffb700;">🎬 Neural Collaborative Filtering</h1>
            <p style="color: #aaa;">Movie recommendations powered by deep learning</p>
        </div>
        """
        )

        with gr.Tabs():
            with gr.TabItem("User Recommendations"):
                with gr.Row():
                    with gr.Column():
                        user_id = gr.Number(
                            label="User ID",
                            value=139143,  # Your example user ID
                            precision=0,
                        )
                        num_recs = gr.Slider(
                            label="Number of Recommendations",
                            minimum=1,
                            maximum=20,
                            step=1,
                            value=5,
                        )
                        recommend_btn = gr.Button(
                            "Get Recommendations", variant="primary"
                        )
                    with gr.Column():
                        user_output = gr.HTML()

                recommend_btn.click(
                    fn=lambda uid, num: generate_user_recommendations(
                        loaded_recommender, uid, num
                    ),
                    inputs=[user_id, num_recs],
                    outputs=user_output,
                )


    return interface


def generate_user_recommendations(loaded_recommender, user_id, num_recs):
    """Generate recommendations for a user using your NCF system"""
    try:
        user_id = int(user_id)
        if user_id < 1:
            return "❌ Please enter a valid User ID (positive integer)"

        # Use your loaded recommender
        recs = loaded_recommender.recommend_for_user(user_id=user_id, top_n=num_recs)

        if recs is None or recs.empty:
            return "❌ No recommendations found for this user"

        result = (
            "<h2 style='color: #ffb700; margin-bottom: 20px;'>Recommended Movies:</h2>"
        )
        for _, row in recs.iterrows():
            result += movie_card(
                row["title"],
                row.get("genres", ""),
                float(row.get("average_rating", 0)),
                row.get("year", None),
            )
        return result

    except Exception as e:
        return f"❌ Error: {str(e)}"

In [None]:
# from pyngrok import ngrok

# if __name__ == "__main__":
#     interface = create_recommender_interface(loaded_recommender)
#     public_url = ngrok.connect(7860)  # Forward port 7860
#     print("Public URL (Share this):", public_url)
#     interface.launch()

In [None]:
# Usage example
if __name__ == "__main__":
    # Load your recommender system (as in your example)
    loaded_recommender = NeuralCollaborativeFilteringSystem.load_for_deployment(
        data_processor, "ncf_deployment"
    )

    # Create and launch interface
    interface = create_recommender_interface(loaded_recommender)
    interface.launch()

Error loading movie options: 'NeuralCollaborativeFilteringSystem' object has no attribute 'movie_features'
* Running on local URL:  http://127.0.0.1:7870
* To create a public link, set `share=True` in `launch()`.


In [None]:
data_processor.df.sample(5)

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,userId,movieId,user_rating,movie_title,user_rating_count,user_rating_avg,genres,(no genres listed),...,movies_avg_rating,year,month,day,hour,minute,movie_year,user_rating_std,user_id_encoded,movie_id_encoded
575853,575855,575855,262449,4992,4,Kate & Leopold,33,3.0,Comedy|Romance,0,...,3.07,2017,2,4,18,46,2001.0,0.612372,7317,2856
440,440,440,130759,104881,2,Out of the Furnace (Dust to Dust) (Low Dweller...,66,3.090909,Drama|Thriller,0,...,3.45,2013,12,11,0,55,2013.0,0.779098,134,134
673902,673904,673904,13428,3259,3,Far and Away,35,2.657143,Adventure|Drama|Romance,0,...,3.21,2004,7,30,22,48,1992.0,1.258918,2963,2154
78643,78643,78643,218233,1617,4,L.A. Confidential,27,3.074074,Crime|Film-Noir|Mystery|Thriller,0,...,4.06,2004,11,20,22,40,1997.0,1.206582,2721,3217
561958,561960,561960,214027,95510,2,"Amazing Spider-Man, The",24,2.958333,Action|Adventure|Sci-Fi|IMAX,0,...,3.23,2023,7,18,12,7,2012.0,0.858673,723,1745
