In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import Input, Embedding, Flatten, Dense, Concatenate, Multiply
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
# 1. Data Processing
class DataProcessor:
    def __init__(self, content_data, user_data, interaction_data):
        self.content_data = content_data
        self.user_data = user_data
        self.interaction_data = interaction_data
        self.content_embeddings = None

    def get_user_profile(self, user_id):
        # Placeholder implementation
        return np.random.rand(1, 100)  # Return a random vector as a user profile

    def get_content_embeddings(self):
        # Placeholder implementation
        if self.content_embeddings is None:
            self.content_embeddings = np.random.rand(len(self.content_data), 100)
        return self.content_embeddings

In [3]:
# 2. Recommendation Models
class HybridRecommender:
    def __init__(self, n_factors=50, n_layers=3, reg_layers=[0, 0, 0], reg_mf=0):
        self.n_factors = n_factors
        self.n_layers = n_layers
        self.reg_layers = reg_layers
        self.reg_mf = reg_mf
        self.model = None

    def build_model(self, n_users, n_items):
        user_input = Input(shape=(1,), dtype='int32', name='user_input')
        item_input = Input(shape=(1,), dtype='int32', name='item_input')

        MF_Embedding_User = Embedding(input_dim=n_users, output_dim=self.n_factors, name='mf_embedding_user',
                                      embeddings_initializer='he_normal', embeddings_regularizer=l2(self.reg_mf),
                                      input_length=1)
        MF_Embedding_Item = Embedding(input_dim=n_items, output_dim=self.n_factors, name='mf_embedding_item',
                                      embeddings_initializer='he_normal', embeddings_regularizer=l2(self.reg_mf),
                                      input_length=1)

        MLP_Embedding_User = Embedding(input_dim=n_users, output_dim=self.n_factors, name='mlp_embedding_user',
                                       embeddings_initializer='he_normal', embeddings_regularizer=l2(self.reg_layers[0]),
                                       input_length=1)
        MLP_Embedding_Item = Embedding(input_dim=n_items, output_dim=self.n_factors, name='mlp_embedding_item',
                                       embeddings_initializer='he_normal', embeddings_regularizer=l2(self.reg_layers[0]),
                                       input_length=1)

        mf_user_latent = Flatten()(MF_Embedding_User(user_input))
        mf_item_latent = Flatten()(MF_Embedding_Item(item_input))
        mf_vector = Multiply()([mf_user_latent, mf_item_latent])

        mlp_user_latent = Flatten()(MLP_Embedding_User(user_input))
        mlp_item_latent = Flatten()(MLP_Embedding_Item(item_input))
        mlp_vector = Concatenate()([mlp_user_latent, mlp_item_latent])

        for idx in range(1, self.n_layers):
            layer = Dense(self.reg_layers[idx], activation='relu', name=f'layer_{idx}',
                          kernel_regularizer=l2(self.reg_layers[idx]))
            mlp_vector = layer(mlp_vector)

        predict_vector = Concatenate()([mf_vector, mlp_vector])
        prediction = Dense(1, activation='sigmoid', kernel_initializer='lecun_uniform', name='prediction')(predict_vector)

        self.model = Model(inputs=[user_input, item_input], outputs=prediction)
        return self.model

    def predict(self, user_id):
        if self.model is None:
            raise ValueError("Model has not been built. Call build_model first.")

        try:
            n_items = self.model.input[1].shape[1]  # Get the number of items from the model's input shape
        except AttributeError:
            print("Error: Model input shape is not as expected. Using a default value.")
            n_items = 1000  # Use a default value, adjust as needed

        all_items = np.arange(n_items)
        user_input = np.full(n_items, user_id)

        try:
            predictions = self.model.predict([user_input, all_items])
            item_scores = [(item_id, score[0]) for item_id, score in zip(all_items, predictions)]
            return sorted(item_scores, key=lambda x: x[1], reverse=True)
        except Exception as e:
            print(f"Error during prediction: {e}")
            print(f"User input shape: {user_input.shape}")
            print(f"All items shape: {all_items.shape}")
            print(f"Model input shapes: {[i.shape for i in self.model.inputs]}")
            return []

In [4]:
# 3. Training and Evaluation
class ModelTrainer:
    def __init__(self, model, data):
        self.model = model
        self.data = data

    def train(self, epochs=20, batch_size=256):
        self.model.compile(optimizer=Adam(lr=0.001), loss='binary_crossentropy', metrics=['accuracy'])
        history = self.model.fit([self.data.user_input, self.data.item_input], self.data.labels,
                                 epochs=epochs, batch_size=batch_size, validation_split=0.1, verbose=2)
        return history

In [5]:
# 4. Real-time Update System
class RealTimeUpdater:
    def __init__(self, model, data_processor):
        self.model = model
        self.data_processor = data_processor

    def update_user_preferences(self, user_id, interaction_data):
        print(f"Updating preferences for user {user_id}")
        # Placeholder: Update user preferences based on new interactions
        pass

    def update_content_features(self, new_content):
        print(f"Updating content features for {len(new_content)} new items")
        # Placeholder: Process and add new content to the system
        pass

    def adjust_recommendations(self, user_id, recommendations):
        print(f"Applying real-time adjustments for user {user_id}")
        # Placeholder: Apply any last-minute adjustments to recommendations
        return recommendations


In [6]:
# 5. Main Recommendation Pipeline
class RecommendationSystem:
    def __init__(self, data_processor, recommender, trainer, real_time_updater):
        self.data_processor = data_processor
        self.recommender = recommender
        self.trainer = trainer
        self.real_time_updater = real_time_updater

    def get_recommendations(self, user_id, top_n=10):
        try:
            cf_recs = self.recommender.predict(user_id)
            cb_recs = self.content_based_recommendations(user_id)
            combined_recs = self.combine_recommendations(cf_recs, cb_recs)
            final_recs = self.real_time_updater.adjust_recommendations(user_id, combined_recs)
            return final_recs[:top_n]
        except Exception as e:
            print(f"Error in get_recommendations: {e}")
            return []

    def content_based_recommendations(self, user_id):
        user_profile = self.data_processor.get_user_profile(user_id)
        content_embeddings = self.data_processor.get_content_embeddings()
        content_similarities = cosine_similarity(user_profile, content_embeddings)
        content_scores = content_similarities.flatten()
        top_indices = content_scores.argsort()[::-1]
        return [(i, content_scores[i]) for i in top_indices]  # Return index instead of content_id

    def combine_recommendations(self, cf_recs, cb_recs, cf_weight=0.7, cb_weight=0.3):
        combined = {}
        for content_id, score in cf_recs:
            combined[content_id] = score * cf_weight
        for content_id, score in cb_recs:
            if content_id in combined:
                combined[content_id] += score * cb_weight
            else:
                combined[content_id] = score * cb_weight
        return sorted(combined.items(), key=lambda x: x[1], reverse=True)

    def train_model(self, epochs=20, batch_size=256):
        print("Starting model training...")
        history = self.trainer.train(epochs=epochs, batch_size=batch_size)
        print("Model training completed.")
        return history

    def update_system(self, new_data):
        print("Updating system with new data...")
        self.real_time_updater.update_user_preferences(new_data['user_id'], new_data['interactions'])
        self.real_time_updater.update_content_features(new_data['new_content'])
        print("System update completed.")

    def evaluate_recommendations(self, test_data):
        print("Evaluating recommendation quality...")
        # Placeholder: Implement evaluation metrics (e.g., precision, recall, NDCG)
        pass

In [7]:
# Usage example
if __name__ == "__main__":
    # Initialize components with dummy data
    n_users, n_items = 1000, 1000
    dummy_content_data = pd.DataFrame({'content_id': range(n_items), 'title': [f'Item {i}' for i in range(n_items)]})
    dummy_user_data = pd.DataFrame({'user_id': range(n_users)})
    dummy_interaction_data = pd.DataFrame({'user_id': [], 'content_id': [], 'interaction': []})

    data_processor = DataProcessor(dummy_content_data, dummy_user_data, dummy_interaction_data)
    recommender = HybridRecommender()
    model = recommender.build_model(n_users, n_items)
    trainer = ModelTrainer(model, None)  # None instead of processed_data
    real_time_updater = RealTimeUpdater(model, data_processor)

    # Create recommendation system
    rec_system = RecommendationSystem(data_processor, recommender, trainer, real_time_updater)

    # Get recommendations for a user
    user_id = 123
    recommendations = rec_system.get_recommendations(user_id, top_n=10)
    print(f"Top 10 recommendations for user {user_id}:")
    for content_id, score in recommendations:
        print(f"Content ID: {content_id}, Score: {score}")

    # Update system with new data
    new_data = {
        'user_id': 123,
        'interactions': [{'content_id': 456, 'interaction_type': 'view', 'duration': 300}],
        'new_content': [{'content_id': 789, 'title': 'New Video', 'description': 'A brand new video'}]
    }
    rec_system.update_system(new_data)

    # Note: We're skipping the actual training and evaluation steps in this example
    # as they would require more complete data and model setup

Applying real-time adjustments for user 123
Top 10 recommendations for user 123:
Content ID: 0, Score: 0.5772477276848731
Content ID: 172, Score: 0.2527714495930758
Content ID: 447, Score: 0.2506938701024575
Content ID: 103, Score: 0.24975590759297636
Content ID: 916, Score: 0.24873586429505953
Content ID: 3, Score: 0.24857641644377532
Content ID: 158, Score: 0.24814219313661456
Content ID: 391, Score: 0.24789772511493313
Content ID: 575, Score: 0.2474255239839387
Content ID: 97, Score: 0.2471853904603532
Updating system with new data...
Updating preferences for user 123
Updating content features for 1 new items
System update completed.
