<a href="https://colab.research.google.com/github/minhaj-mhd/two-tower-recommedation/blob/main/two_tower_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#install packages


In [1]:
print("⏳ Installing and upgrading all required packages...")

%pip install --upgrade -q tensorflow tensorflow-recommenders tf-keras tensorflow-text
%pip install -q faiss-cpu

print("\n✅ All packages have been installed and upgraded.")

⏳ Installing and upgrading all required packages...
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m644.9/644.9 MB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m96.2/96.2 kB[0m [31m6.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m64.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.2/5.2 MB[0m [31m67.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.7/4.7 MB[0m [31m86.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.5/5.5 MB[0m [31m83.9 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tensorflow-decision-forests 1.11.0 requires tensorflow==

In [2]:
%pip install --upgrade -q tensorflow-decision-forests


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.1/16.1 MB[0m [31m44.6 MB/s[0m eta [36m0:00:00[0m
[?25h

In [3]:
import tensorflow as tf
import tensorflow_recommenders as tfrs
import tf_keras
import faiss
import tensorflow_text as tf_text
import tensorflow_decision_forests as tfdf

print(f"tensorflow: {tf.__version__}")
print(f"tensorflow-recommenders: {tfrs.__version__}")
print(f"tf-keras: {tf_keras.__version__}")
print(f"faiss-cpu: {faiss.__version__}")
print(f"tensorflow-text: {tf_text.__version__}")
print(f"tensorflow-decision-forests: {tfdf.__version__}")

tensorflow: 2.19.0
tensorflow-recommenders: v0.7.3
tf-keras: 2.19.0
faiss-cpu: 1.11.0
tensorflow-text: 2.19.0
tensorflow-decision-forests: 1.12.0




# Two tower model

In [4]:
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_recommenders as tfrs
from tensorflow.keras.layers import TextVectorization
import faiss
from collections import defaultdict



In [5]:
# --- Step 1: Fabricate Data with 20 Categories and Enhanced User Demographics ---
print("[1] Fabricating data with 20 categories and enhanced user demographics...")

# 20 diverse categories
categories = [
    "electronics", "clothing", "books", "home_garden", "sports_outdoors",
    "beauty_health", "automotive", "toys_games", "jewelry", "music",
    "movies_tv", "kitchen_dining", "office_supplies", "pet_supplies", "crafts",
    "industrial", "grocery", "baby_products", "shoes", "watches"
]

# User demographics data
age_groups = ["18-24", "25-34", "35-44", "45-54", "55-64", "65+"]
locations = ["New York", "Los Angeles", "Chicago", "Houston", "Phoenix", "Philadelphia",
            "San Antonio", "San Diego", "Dallas", "San Jose", "Austin", "Jacksonville",
            "Fort Worth", "Columbus", "Charlotte", "Seattle", "Denver", "Boston"]
genders = ["Male", "Female", "Other"]

num_items = 10000  # Increased items to accommodate more categories
num_users = 1000   # Increased users for better diversity

# Create items with explicit category tracking
item_titles = [f"Product {i}" for i in range(num_items)]
item_categories = [categories[i % len(categories)] for i in range(num_items)]

# Create diverse descriptions based on category
description_templates = {
    "electronics": [
        "High-tech electronic device with advanced features",
        "Smart gadget with wireless connectivity and AI integration",
        "Innovative electronic tool for modern digital life",
        "Premium electronic device with cutting-edge technology"
    ],
    "clothing": [
        "Fashionable apparel made from quality sustainable materials",
        "Comfortable and stylish garment for everyday wear",
        "Trendy clothing with modern design and premium fabric",
        "Versatile wardrobe piece suitable for various occasions"
    ],
    "books": [
        "Educational book covering important academic topics",
        "Engaging literature for avid readers and book enthusiasts",
        "Informative guide with practical knowledge and insights",
        "Bestselling book with compelling storytelling and research"
    ],
    "home_garden": [
        "Durable home improvement tool for DIY projects",
        "Garden equipment for landscaping and plant care",
        "Home decor item to enhance living space aesthetics",
        "Functional household item for daily convenience"
    ],
    "sports_outdoors": [
        "Professional sports equipment for athletic performance",
        "Outdoor gear for adventure and recreational activities",
        "Fitness equipment for home workout routines",
        "Camping and hiking essentials for outdoor enthusiasts"
    ],
    "beauty_health": [
        "Premium skincare product with natural ingredients",
        "Health supplement for wellness and vitality",
        "Cosmetic item for beauty enhancement and self-care",
        "Personal care product for daily hygiene routine"
    ],
    "automotive": [
        "High-quality automotive part for vehicle maintenance",
        "Car accessory for enhanced driving experience",
        "Professional-grade tool for automotive repair",
        "Vehicle enhancement product for performance optimization"
    ],
    "toys_games": [
        "Educational toy for children's development and learning",
        "Board game for family entertainment and bonding",
        "Creative plaything that sparks imagination and creativity",
        "Interactive game for skill development and fun"
    ],
    "jewelry": [
        "Elegant jewelry piece crafted with precious metals",
        "Stylish accessory for fashion and personal expression",
        "Handcrafted jewelry with unique design elements",
        "Luxury jewelry item for special occasions"
    ],
    "music": [
        "Professional music equipment for audio production",
        "Musical instrument for creative expression and performance",
        "High-quality audio device for music enthusiasts",
        "Music accessory for enhanced listening experience"
    ],
    "movies_tv": [
        "Entertainment media for leisure and relaxation",
        "Classic film collection for movie enthusiasts",
        "TV series with compelling storylines and characters",
        "Documentary content for educational entertainment"
    ],
    "kitchen_dining": [
        "Professional kitchen utensil for culinary excellence",
        "Dining accessory for elegant meal presentation",
        "Cooking tool made from premium food-safe materials",
        "Kitchen gadget for efficient food preparation"
    ],
    "office_supplies": [
        "Professional office equipment for workplace productivity",
        "Stationery item for organization and documentation",
        "Ergonomic office accessory for comfort and efficiency",
        "Business tool for professional operations"
    ],
    "pet_supplies": [
        "Pet care product for animal health and happiness",
        "Pet toy for entertainment and exercise",
        "Pet accessory for comfort and safety",
        "Pet nutrition product for optimal health"
    ],
    "crafts": [
        "Art supply for creative projects and expression",
        "Craft material for DIY projects and hobbies",
        "Creative tool for artistic endeavors and crafting",
        "Handcraft supply for personalized creations"
    ],
    "industrial": [
        "Industrial equipment for manufacturing and production",
        "Heavy-duty tool for professional industrial use",
        "Machinery component for industrial operations",
        "Professional-grade equipment for industrial applications"
    ],
    "grocery": [
        "Premium food product for nutritious meals",
        "Organic ingredient for healthy cooking",
        "Gourmet food item for culinary excellence",
        "Essential grocery item for daily nutrition"
    ],
    "baby_products": [
        "Safe baby product for infant care and development",
        "Baby accessory for comfort and convenience",
        "Child safety item for protection and security",
        "Developmental toy for early childhood learning"
    ],
    "shoes": [
        "Comfortable footwear for daily wear and activities",
        "Athletic shoe for sports and fitness activities",
        "Fashion shoe for style and personal expression",
        "Professional footwear for workplace requirements"
    ],
    "watches": [
        "Precision timepiece with advanced features",
        "Luxury watch for style and status",
        "Sports watch for active lifestyle tracking",
        "Smart watch with digital connectivity features"
    ]
}

item_descriptions = []
for i in range(num_items):
    category = item_categories[i]
    template = description_templates[category][i % len(description_templates[category])]
    item_descriptions.append(f"{template}. Model v{i % 15}. Item #{i}.")

items_data = {
    "item_id": [str(i) for i in range(num_items)],
    "item_title": item_titles,
    "item_description": item_descriptions,
    "category": item_categories
}
items_df = pd.DataFrame(items_data)

# Create category-to-items mapping for easier lookup
category_to_items = defaultdict(list)
for idx, row in items_df.iterrows():
    category_to_items[row['category']].append(row['item_id'])

# Generate enhanced user demographics data
print("Generating enhanced user demographics...")
user_demographics = []
for user_id in range(num_users):
    # Generate demographics with realistic distributions
    age_group = np.random.choice(age_groups, p=[0.15, 0.25, 0.22, 0.18, 0.12, 0.08])  # Weighted towards younger users
    location = np.random.choice(locations)
    gender = np.random.choice(genders, p=[0.48, 0.50, 0.02])  # Realistic gender distribution

    user_demographics.append({
        "user_id": str(user_id),
        "age_group": age_group,
        "location": location,
        "gender": gender
    })

users_df = pd.DataFrame(user_demographics)

# Generate user interactions with demographic influence on preferences
print("Generating user interactions with demographic-influenced preferences...")
user_interactions = []
user_categories = {}  # Track which categories each user prefers

# Define demographic preferences (realistic patterns)
demographic_preferences = {
    "age_group": {
        "18-24": {"electronics": 1.5, "clothing": 1.4, "music": 1.3, "toys_games": 1.2},
        "25-34": {"electronics": 1.3, "home_garden": 1.2, "clothing": 1.2, "automotive": 1.1},
        "35-44": {"home_garden": 1.4, "automotive": 1.3, "office_supplies": 1.2, "baby_products": 1.5},
        "45-54": {"home_garden": 1.3, "automotive": 1.2, "books": 1.2, "kitchen_dining": 1.1},
        "55-64": {"books": 1.3, "home_garden": 1.2, "kitchen_dining": 1.2, "beauty_health": 1.1},
        "65+": {"books": 1.4, "home_garden": 1.3, "beauty_health": 1.2, "kitchen_dining": 1.1}
    },
    "gender": {
        "Male": {"electronics": 1.3, "automotive": 1.4, "sports_outdoors": 1.3, "tools": 1.2},
        "Female": {"clothing": 1.4, "beauty_health": 1.5, "jewelry": 1.3, "baby_products": 1.2},
        "Other": {"clothing": 1.1, "electronics": 1.1, "books": 1.2, "music": 1.1}
    }
}

for user_id in range(num_users):
    user_demo = users_df[users_df['user_id'] == str(user_id)].iloc[0]

    # Calculate category preferences based on demographics
    category_scores = {}
    for category in categories:
        base_score = 1.0

        # Age group influence
        age_prefs = demographic_preferences["age_group"].get(user_demo['age_group'], {})
        age_multiplier = age_prefs.get(category, 1.0)

        # Gender influence
        gender_prefs = demographic_preferences["gender"].get(user_demo['gender'], {})
        gender_multiplier = gender_prefs.get(category, 1.0)

        # Combine influences
        category_scores[category] = base_score * age_multiplier * gender_multiplier

    # Select 3-5 categories based on weighted preferences
    num_categories = np.random.randint(3, 6)
    category_weights = np.array([category_scores[cat] for cat in categories])
    category_weights = category_weights / np.sum(category_weights)  # Normalize

    preferred_categories = np.random.choice(categories, size=num_categories, replace=False, p=category_weights)
    user_categories[str(user_id)] = preferred_categories

    # Generate interactions for each preferred category
    for category in preferred_categories:
        # Random number of interactions per category (1-5)
        num_interactions = np.random.randint(1, 6)  # 1 to 5 interactions

        # Select random items from this category
        available_items = category_to_items[category]
        selected_items = np.random.choice(available_items, size=num_interactions, replace=True)

        for item_id in selected_items:
            user_interactions.append({
                "user_id": str(user_id),
                "item_id": item_id
            })

interactions_df = pd.DataFrame(user_interactions)

# Calculate statistics
total_interactions = len(interactions_df)
avg_interactions_per_user = total_interactions / num_users

print(f"Generated {len(items_df)} items across {len(categories)} categories")
print(f"Generated {total_interactions} interactions from {num_users} users")
print(f"Average interactions per user: {avg_interactions_per_user:.1f}")

# Display demographic distribution
print(f"\nUser demographic distribution:")
print("Age groups:")
for age_group in age_groups:
    count = len(users_df[users_df['age_group'] == age_group])
    print(f"  {age_group}: {count} users ({count/num_users:.1%})")

print("Gender distribution:")
for gender in genders:
    count = len(users_df[users_df['gender'] == gender])
    print(f"  {gender}: {count} users ({count/num_users:.1%})")

# Display sample user profiles
print("\nSample user profiles:")
for i in range(5):
    user_id = str(i)
    user_demo = users_df[users_df['user_id'] == user_id].iloc[0]
    cats = user_categories[user_id]
    user_interactions_count = len(interactions_df[interactions_df['user_id'] == user_id])
    print(f"User {user_id}: {user_demo['age_group']}, {user_demo['gender']}, {user_demo['location']}")
    print(f"  Categories: {list(cats)} ({user_interactions_count} interactions)")

items_ds = tf.data.Dataset.from_tensor_slices(dict(items_df))


[1] Fabricating data with 20 categories and enhanced user demographics...
Generating enhanced user demographics...
Generating user interactions with demographic-influenced preferences...
Generated 10000 items across 20 categories
Generated 12405 interactions from 1000 users
Average interactions per user: 12.4

User demographic distribution:
Age groups:
  18-24: 144 users (14.4%)
  25-34: 250 users (25.0%)
  35-44: 219 users (21.9%)
  45-54: 181 users (18.1%)
  55-64: 114 users (11.4%)
  65+: 92 users (9.2%)
Gender distribution:
  Male: 471 users (47.1%)
  Female: 501 users (50.1%)
  Other: 28 users (2.8%)

Sample user profiles:
User 0: 35-44, Male, San Jose
  Categories: [np.str_('electronics'), np.str_('music'), np.str_('books'), np.str_('industrial')] (13 interactions)
User 1: 65+, Male, Denver
  Categories: [np.str_('kitchen_dining'), np.str_('sports_outdoors'), np.str_('books')] (6 interactions)
User 2: 25-34, Male, Phoenix
  Categories: [np.str_('electronics'), np.str_('shoes'), n

In [6]:
# --- Step 2: Enhanced Item Tower with Category Integration ---
print("\n[2] Building and training the enhanced Item Tower with category integration...")
embedding_dimension = 128
max_tokens = 15_000
sequence_length = 120

# Create text vectorizers for both description and category
description_vectorizer = TextVectorization(
    max_tokens=max_tokens,
    output_sequence_length=sequence_length,
    name="description_vectorizer"
)
description_vectorizer.adapt(items_ds.map(lambda x: x["item_description"]).batch(256))

# Create category vectorizer (much smaller vocabulary)
category_vectorizer = TextVectorization(
    max_tokens=len(categories) + 10,  # Small vocabulary for categories
    output_sequence_length=1,  # Categories are single tokens
    name="category_vectorizer"
)
category_vectorizer.adapt(items_ds.map(lambda x: x["category"]).batch(256))

class EnhancedItemModel(tf.keras.Model):
    def __init__(self, description_vectorizer, category_vectorizer, embedding_dim=128):
        super().__init__()
        self.description_vectorizer = description_vectorizer
        self.category_vectorizer = category_vectorizer
        self.embedding_dim = embedding_dim

        # Description embedding branch
        self.description_embedding = tf.keras.Sequential([
            self.description_vectorizer,
            tf.keras.layers.Embedding(
                input_dim=self.description_vectorizer.vocabulary_size(),
                output_dim=embedding_dim,
                mask_zero=True,
                name="description_embedding"
            ),
            tf.keras.layers.GlobalAveragePooling1D(name="description_pooling"),
        ], name="description_branch")

        # Category embedding branch
        self.category_embedding = tf.keras.Sequential([
            self.category_vectorizer,
            tf.keras.layers.Embedding(
                input_dim=self.category_vectorizer.vocabulary_size(),
                output_dim=32,  # Smaller embedding for categories
                mask_zero=True,
                name="category_embedding"
            ),
            tf.keras.layers.GlobalAveragePooling1D(name="category_pooling"),
        ], name="category_branch")

        # Fusion layer to combine description and category embeddings
        self.fusion_dense = tf.keras.Sequential([
            tf.keras.layers.Dense(256, activation="relu", name="fusion_dense_1"),
            tf.keras.layers.BatchNormalization(name="fusion_bn_1"),
            tf.keras.layers.Dropout(0.3, name="fusion_dropout_1"),
            tf.keras.layers.Dense(128, activation="relu", name="fusion_dense_2"),
            tf.keras.layers.BatchNormalization(name="fusion_bn_2"),
            tf.keras.layers.Dropout(0.3, name="fusion_dropout_2"),
            tf.keras.layers.Dense(embedding_dim, name="fusion_output")
        ], name="fusion_layer")

    def call(self, inputs, training=None):
        # Process description
        description_emb = self.description_embedding(inputs["item_description"])

        # Process category
        category_emb = self.category_embedding(inputs["category"])

        # Concatenate description and category embeddings
        combined = tf.concat([description_emb, category_emb], axis=1)

        # Apply fusion layer
        output = self.fusion_dense(combined, training=training)

        return output

class SelfSupervisedItemTwoTower(tfrs.Model):
    def __init__(self, item_model):
        super().__init__()
        self.item_model = item_model
        self.task = tfrs.tasks.Retrieval()

    def compute_loss(self, features, training=False):
        item_embeddings = self.item_model(features, training=training)
        return self.task(query_embeddings=item_embeddings, candidate_embeddings=item_embeddings)

# Initialize the enhanced item tower
item_tower = EnhancedItemModel(description_vectorizer, category_vectorizer, embedding_dimension)
item_model_trainer = SelfSupervisedItemTwoTower(item_tower)
item_model_trainer.compile(optimizer=tf.keras.optimizers.Adam(0.001))

# Prepare training data with both description and category
train_item_ds = items_ds.map(lambda x: {
    "item_description": x["item_description"],
    "category": x["category"]
}).batch(512).cache()

print("Training Enhanced Item Tower with description and category...")
item_model_trainer.fit(train_item_ds, epochs=15, verbose=1)
print("Enhanced Item Tower training complete.")



[2] Building and training the enhanced Item Tower with category integration...
Training Enhanced Item Tower with description and category...
Epoch 1/15
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 84ms/step - loss: 4.6075 - regularization_loss: 0.0000e+00 - total_loss: 4.6075
Epoch 2/15
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 69ms/step - loss: 2.3443 - regularization_loss: 0.0000e+00 - total_loss: 2.3443
Epoch 3/15
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 78ms/step - loss: 1.2802 - regularization_loss: 0.0000e+00 - total_loss: 1.2802
Epoch 4/15
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 109ms/step - loss: 0.8636 - regularization_loss: 0.0000e+00 - total_loss: 0.8636
Epoch 5/15
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 71ms/step - loss: 1.1293 - regularization_loss: 0.0000e+00 - total_loss: 1.1293
Epoch 6/15
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 74ms/s

In [7]:
# --- Step 3: Generate and Store Item Embeddings in Faiss ---
print("\n[3] Generating item embeddings and storing in Faiss...")
index = faiss.IndexFlatL2(embedding_dimension)

# Generate embeddings using both description and category
item_embeddings_generator = items_ds.batch(512).map(lambda x: item_tower({
    "item_description": x["item_description"],
    "category": x["category"]
}))
all_item_embeddings = np.concatenate(list(item_embeddings_generator.as_numpy_iterator()))

# Normalize embeddings for better similarity search
all_item_embeddings = all_item_embeddings / np.linalg.norm(all_item_embeddings, axis=1, keepdims=True)

index.add(all_item_embeddings)
print(f"Faiss index now contains {index.ntotal} vectors.")
index_to_item_id = {i: item_id for i, item_id in enumerate(items_df["item_id"])}



[3] Generating item embeddings and storing in Faiss...
Faiss index now contains 10000 vectors.


In [8]:


# --- Step 4: Enhanced User Tower with Demographics ---
print("\n[4] Building and training the Enhanced User Tower with demographics...")
unique_user_ids = interactions_df["user_id"].unique()

# Create dataset from user demographics
users_ds = tf.data.Dataset.from_tensor_slices(dict(users_df))

# Create vectorizers for demographic features
age_group_vectorizer = TextVectorization(
    max_tokens=len(age_groups) + 5,
    output_sequence_length=1,
    name="age_group_vectorizer"
)
age_group_vectorizer.adapt(users_ds.map(lambda x: x["age_group"]).batch(256))

location_vectorizer = TextVectorization(
    max_tokens=len(locations) + 5,
    output_sequence_length=1,
    name="location_vectorizer"
)
location_vectorizer.adapt(users_ds.map(lambda x: x["location"]).batch(256))

gender_vectorizer = TextVectorization(
    max_tokens=len(genders) + 5,
    output_sequence_length=1,
    name="gender_vectorizer"
)
gender_vectorizer.adapt(users_ds.map(lambda x: x["gender"]).batch(256))

class EnhancedUserModel(tf.keras.Model):
    def __init__(self, user_ids, age_group_vectorizer, location_vectorizer, gender_vectorizer):
        super().__init__()
        self.age_group_vectorizer = age_group_vectorizer
        self.location_vectorizer = location_vectorizer
        self.gender_vectorizer = gender_vectorizer

        # User ID embedding
        self.user_id_embedding = tf.keras.Sequential([
            tf.keras.layers.StringLookup(vocabulary=user_ids, mask_token=None),
            tf.keras.layers.Embedding(len(user_ids) + 1, 64, name="user_id_emb")
        ], name="user_id_branch")

        # Age group embedding
        self.age_group_embedding = tf.keras.Sequential([
            self.age_group_vectorizer,
            tf.keras.layers.Embedding(
                input_dim=self.age_group_vectorizer.vocabulary_size(),
                output_dim=16,
                mask_zero=True,
                name="age_group_emb"
            ),
            tf.keras.layers.GlobalAveragePooling1D(),
        ], name="age_group_branch")

        # Location embedding
        self.location_embedding = tf.keras.Sequential([
            self.location_vectorizer,
            tf.keras.layers.Embedding(
                input_dim=self.location_vectorizer.vocabulary_size(),
                output_dim=32,
                mask_zero=True,
                name="location_emb"
            ),
            tf.keras.layers.GlobalAveragePooling1D(),
        ], name="location_branch")

        # Gender embedding
        self.gender_embedding = tf.keras.Sequential([
            self.gender_vectorizer,
            tf.keras.layers.Embedding(
                input_dim=self.gender_vectorizer.vocabulary_size(),
                output_dim=8,
                mask_zero=True,
                name="gender_emb"
            ),
            tf.keras.layers.GlobalAveragePooling1D(),
        ], name="gender_branch")

        # Fusion layers to combine all user features
        self.fusion_dense = tf.keras.Sequential([
            tf.keras.layers.Dense(256, activation="relu", name="user_fusion_dense_1"),
            tf.keras.layers.BatchNormalization(name="user_fusion_bn_1"),
            tf.keras.layers.Dropout(0.3, name="user_fusion_dropout_1"),
            tf.keras.layers.Dense(128, activation="relu", name="user_fusion_dense_2"),
            tf.keras.layers.BatchNormalization(name="user_fusion_bn_2"),
            tf.keras.layers.Dropout(0.3, name="user_fusion_dropout_2"),
            tf.keras.layers.Dense(embedding_dimension, name="user_fusion_output")
        ], name="user_fusion_layer")

    def call(self, inputs, training=None):
        # Process user ID
        user_id_emb = self.user_id_embedding(inputs["user_id"])

        # Process demographics
        age_group_emb = self.age_group_embedding(inputs["age_group"])
        location_emb = self.location_embedding(inputs["location"])
        gender_emb = self.gender_embedding(inputs["gender"])

        # Concatenate all user features
        combined = tf.concat([user_id_emb, age_group_emb, location_emb, gender_emb], axis=1)

        # Apply fusion layer
        output = self.fusion_dense(combined, training=training)

        return output

class EnhancedUserItemRetrievalModel(tfrs.Model):
    def __init__(self, user_model, item_model):
        super().__init__()
        self.user_model = user_model
        self.item_model = item_model
        self.item_model.trainable = False  # Keep item model frozen

        self.task = tfrs.tasks.Retrieval()

    def compute_loss(self, data, training=False):
        user_embeddings = self.user_model(data, training=training)

        # Get item embeddings for the interacted items
        item_data = {
            "item_description": data["item_description"],
            "category": data["category"]
        }
        item_embeddings = self.item_model(item_data, training=False)

        # Normalize embeddings
        user_embeddings = tf.nn.l2_normalize(user_embeddings, axis=1)
        item_embeddings = tf.nn.l2_normalize(item_embeddings, axis=1)

        return self.task(
            query_embeddings=user_embeddings,
            candidate_embeddings=item_embeddings
        )

# Prepare training data with user demographics and item details
interactions_with_details_df = pd.merge(
    interactions_df,
    items_df[['item_id', 'item_description', 'category']],
    on='item_id'
)

# Add user demographics to interactions
interactions_with_details_df = pd.merge(
    interactions_with_details_df,
    users_df[['user_id', 'age_group', 'location', 'gender']],
    on='user_id'
)

# Create negative sampling for better training
print("Creating training dataset with negative sampling...")
positive_interactions = interactions_with_details_df.copy()
positive_interactions['label'] = 1.0

# Create negative samples
negative_interactions = []
for user_id in unique_user_ids:
    user_positive_items = set(positive_interactions[positive_interactions['user_id'] == user_id]['item_id'])
    user_demo = users_df[users_df['user_id'] == user_id].iloc[0]

    # Sample negative items (items the user hasn't interacted with)
    num_negatives = min(len(user_positive_items), 20)  # Limit negatives to prevent memory issues
    all_items = set(items_df['item_id'])
    negative_items = list(all_items - user_positive_items)

    if len(negative_items) >= num_negatives:
        sampled_negatives = np.random.choice(negative_items, size=num_negatives, replace=False)

        for item_id in sampled_negatives:
            item_row = items_df[items_df['item_id'] == item_id].iloc[0]
            negative_interactions.append({
                'user_id': user_id,
                'item_id': item_id,
                'item_description': item_row['item_description'],
                'category': item_row['category'],
                'age_group': user_demo['age_group'],
                'location': user_demo['location'],
                'gender': user_demo['gender'],
                'label': 0.0
            })

negative_interactions_df = pd.DataFrame(negative_interactions)
full_training_df = pd.concat([positive_interactions, negative_interactions_df], ignore_index=True)

# Shuffle the training data
full_training_df = full_training_df.sample(frac=1).reset_index(drop=True)

full_interactions_ds = tf.data.Dataset.from_tensor_slices(dict(full_training_df))
train_ds_user = full_interactions_ds.shuffle(50_000).batch(512).cache()

user_tower = EnhancedUserModel(unique_user_ids, age_group_vectorizer, location_vectorizer, gender_vectorizer)
user_model_trainer = EnhancedUserItemRetrievalModel(user_tower, item_tower)
user_model_trainer.compile(optimizer=tf.keras.optimizers.Adam(0.001))

# Train the user model
print("Training Enhanced User Tower with demographics...")
user_model_trainer.fit(train_ds_user, epochs=15, verbose=1)
print("Enhanced User Tower training complete.")



[4] Building and training the Enhanced User Tower with demographics...
Creating training dataset with negative sampling...
Training Enhanced User Tower with demographics...
Epoch 1/15
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 56ms/step - loss: 3099.0576 - regularization_loss: 0.0000e+00 - total_loss: 3099.0576
Epoch 2/15
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 38ms/step - loss: 3094.3914 - regularization_loss: 0.0000e+00 - total_loss: 3094.3914
Epoch 3/15
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 41ms/step - loss: 3089.5247 - regularization_loss: 0.0000e+00 - total_loss: 3089.5247
Epoch 4/15
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 39ms/step - loss: 3086.2178 - regularization_loss: 0.0000e+00 - total_loss: 3086.2178
Epoch 5/15
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 59ms/step - loss: 3084.3528 - regularization_loss: 0.0000e+00 - total_loss: 3084.3528
Epoch 6/15
[1m49/49

In [9]:

# --- Step 5: Enhanced Recommendation Function with Demographics ---
print("\n[5] Implementing enhanced recommendation functions with demographics...")

def get_demographic_recommendations(user_id, top_k=15):
    """Get recommendations using enhanced user model with demographics"""
    print(f"\n--- Enhanced demographic recommendations for user '{user_id}' ---")

    if user_id not in unique_user_ids:
        print(f"User '{user_id}' is a new user (cold start).")
        return None

    # Get user demographics and preferences
    user_demo = users_df[users_df['user_id'] == user_id].iloc[0]
    preferred_categories = user_categories[user_id]

    print(f"User demographics: {user_demo['age_group']}, {user_demo['gender']}, {user_demo['location']}")
    print(f"Preferred categories ({len(preferred_categories)}): {list(preferred_categories)}")

    # Get enhanced user embedding with demographics
    user_input = {
        "user_id": tf.constant([user_id]),
        "age_group": tf.constant([user_demo['age_group']]),
        "location": tf.constant([user_demo['location']]),
        "gender": tf.constant([user_demo['gender']])
    }

    user_embedding = user_tower(user_input).numpy()
    user_embedding = user_embedding / np.linalg.norm(user_embedding, axis=1, keepdims=True)

    # Get items user has already interacted with
    user_interacted_items = set(interactions_df[interactions_df['user_id'] == user_id]['item_id'])

    # Search for similar items using enhanced embeddings
    search_k = min(top_k * 3, index.ntotal)
    distances, indices = index.search(user_embedding, search_k)

    recommendations = []
    for idx in indices[0]:
        item_id = index_to_item_id[idx]
        if item_id in user_interacted_items:
            continue

        item_row = items_df[items_df['item_id'] == item_id].iloc[0]
        recommendations.append({
            'item_id': item_id,
            'category': item_row['category'],
            'title': item_row['item_title']
        })

        if len(recommendations) >= top_k:
            break

    # Calculate accuracy
    category_counts = defaultdict(int)
    correct_recommendations = 0

    for rec in recommendations:
        category_counts[rec['category']] += 1
        if rec['category'] in preferred_categories:
            correct_recommendations += 1

    accuracy = correct_recommendations / len(recommendations) if recommendations else 0

    print(f"Generated {len(recommendations)} recommendations")
    print(f"Category distribution:")
    for category in sorted(category_counts.keys()):
        count = category_counts[category]
        percentage = (count / len(recommendations)) * 100 if recommendations else 0
        is_preferred = "✓" if category in preferred_categories else "✗"
        print(f"  {is_preferred} {category}: {count} items ({percentage:.1f}%)")

    print(f"Accuracy: {correct_recommendations}/{len(recommendations)} ({accuracy:.1%})")

    return accuracy



[5] Implementing enhanced recommendation functions with demographics...


In [10]:

def get_balanced_recommendations(user_id, top_k=15):
    """Get recommendations with explicit category balancing"""
    print(f"\n--- Balanced recommendations for user '{user_id}' ---")

    if user_id not in unique_user_ids:
        print(f"User '{user_id}' is a new user (cold start).")
        return handle_cold_start_user(user_id, top_k)

    # Get user demographics and preferences
    user_demo = users_df[users_df['user_id'] == user_id].iloc[0]
    preferred_categories = user_categories[user_id]

    print(f"User demographics: {user_demo['age_group']}, {user_demo['gender']}, {user_demo['location']}")
    print(f"Preferred categories ({len(preferred_categories)}): {list(preferred_categories)}")

    # Get enhanced user embedding with demographics
    user_input = {
        "user_id": tf.constant([user_id]),
        "age_group": tf.constant([user_demo['age_group']]),
        "location": tf.constant([user_demo['location']]),
        "gender": tf.constant([user_demo['gender']])
    }

    user_embedding = user_tower(user_input).numpy()
    user_embedding = user_embedding / np.linalg.norm(user_embedding, axis=1, keepdims=True)

    # Get items user has already interacted with
    user_interacted_items = set(interactions_df[interactions_df['user_id'] == user_id]['item_id'])

    # Balanced recommendation approach
    recommendations = []
    category_counts = defaultdict(int)

    # Calculate items per preferred category
    items_per_category = max(1, top_k // len(preferred_categories))
    remaining_slots = top_k - (items_per_category * len(preferred_categories))

    print(f"Target: {items_per_category} items per category, {remaining_slots} flexible slots")

    # For each preferred category, find the best items
    for category in preferred_categories:
        category_items = category_to_items[category]
        category_item_embeddings = []
        category_item_ids = []

        # Get embeddings for items in this category
        for item_id in category_items:
            if item_id not in user_interacted_items:
                idx = int(item_id)
                category_item_embeddings.append(all_item_embeddings[idx])
                category_item_ids.append(item_id)

        if not category_item_embeddings:
            continue

        # Calculate similarities to user embedding
        category_item_embeddings = np.array(category_item_embeddings)
        similarities = np.dot(category_item_embeddings, user_embedding.T).flatten()

        # Get top items for this category
        top_indices = np.argsort(similarities)[::-1][:items_per_category]

        for idx in top_indices:
            item_id = category_item_ids[idx]
            item_row = items_df[items_df['item_id'] == item_id].iloc[0]
            recommendations.append({
                'item_id': item_id,
                'category': item_row['category'],
                'title': item_row['item_title'],
                'similarity': similarities[idx]
            })
            category_counts[category] += 1

    # Fill remaining slots with best overall recommendations
    if remaining_slots > 0:
        search_k = min(top_k * 3, index.ntotal)
        distances, indices = index.search(user_embedding, search_k)

        added_items = set(rec['item_id'] for rec in recommendations)

        for idx in indices[0]:
            if remaining_slots <= 0:
                break

            item_id = index_to_item_id[idx]
            if item_id in user_interacted_items or item_id in added_items:
                continue

            item_row = items_df[items_df['item_id'] == item_id].iloc[0]
            recommendations.append({
                'item_id': item_id,
                'category': item_row['category'],
                'title': item_row['item_title'],
                'similarity': 1.0 - distances[0][list(indices[0]).index(idx)]  # Convert distance to similarity
            })
            category_counts[item_row['category']] += 1
            added_items.add(item_id)
            remaining_slots -= 1

    # Calculate accuracy
    correct_recommendations = sum(1 for rec in recommendations if rec['category'] in preferred_categories)
    accuracy = correct_recommendations / len(recommendations) if recommendations else 0

    print(f"Generated {len(recommendations)} recommendations")
    print(f"Category distribution:")
    for category in sorted(category_counts.keys()):
        count = category_counts[category]
        percentage = (count / len(recommendations)) * 100 if recommendations else 0
        is_preferred = "✓" if category in preferred_categories else "✗"
        print(f"  {is_preferred} {category}: {count} items ({percentage:.1f}%)")

    print(f"Accuracy: {correct_recommendations}/{len(recommendations)} ({accuracy:.1%})")

    return accuracy


In [11]:

def handle_cold_start_user(user_id, top_k=15):
    """Handle recommendations for new users without interaction history"""
    print(f"Handling cold start for user '{user_id}'")

    # For cold start, use demographic-based recommendations
    # This is a simplified approach - in practice, you might use popularity-based or content-based filtering

    # Sample popular items from each category
    popular_items = []
    items_per_category = max(1, top_k // len(categories))

    for category in categories[:min(len(categories), top_k)]:
        category_items = category_to_items[category]
        if category_items:
            # For simplicity, take the first few items from each category
            # In practice, you'd want to use popularity metrics
            sample_size = min(items_per_category, len(category_items))
            sampled_items = np.random.choice(category_items, size=sample_size, replace=False)

            for item_id in sampled_items:
                item_row = items_df[items_df['item_id'] == item_id].iloc[0]
                popular_items.append({
                    'item_id': item_id,
                    'category': item_row['category'],
                    'title': item_row['item_title']
                })

    # Shuffle and limit to top_k
    np.random.shuffle(popular_items)
    popular_items = popular_items[:top_k]

    print(f"Generated {len(popular_items)} cold start recommendations")
    category_counts = defaultdict(int)
    for item in popular_items:
        category_counts[item['category']] += 1

    print("Category distribution:")
    for category in sorted(category_counts.keys()):
        count = category_counts[category]
        percentage = (count / len(popular_items)) * 100 if popular_items else 0
        print(f"  {category}: {count} items ({percentage:.1f}%)")

    return 0.0  # Cold start accuracy is 0 since we don't know user preferences



In [12]:
def get_category_aware_recommendations(user_id, top_k=15, category_diversity_weight=0.3):
    """Get recommendations with category diversity weighting - Fixed infinite loop issue"""
    print(f"\n--- Category-aware recommendations for user '{user_id}' ---")

    if user_id not in unique_user_ids:
        print(f"User '{user_id}' is a new user (cold start).")
        return handle_cold_start_user(user_id, top_k)

    # Get user demographics and preferences
    user_demo = users_df[users_df['user_id'] == user_id].iloc[0]
    preferred_categories = user_categories[user_id]

    # Convert numpy strings to regular strings to avoid comparison issues
    preferred_categories = set(str(cat) for cat in preferred_categories)

    print(f"User demographics: {user_demo['age_group']}, {user_demo['gender']}, {user_demo['location']}")
    print(f"Preferred categories ({len(preferred_categories)}): {list(preferred_categories)}")

    # Get enhanced user embedding with demographics
    user_input = {
        "user_id": tf.constant([user_id]),
        "age_group": tf.constant([user_demo['age_group']]),
        "location": tf.constant([user_demo['location']]),
        "gender": tf.constant([user_demo['gender']])
    }

    try:
        user_embedding = user_tower(user_input).numpy()
        user_embedding = user_embedding / np.linalg.norm(user_embedding, axis=1, keepdims=True)
    except Exception as e:
        print(f"Error getting user embedding: {e}")
        return 0.0

    # Get items user has already interacted with
    user_interacted_items = set(interactions_df[interactions_df['user_id'] == user_id]['item_id'])

    # Get more candidates than needed for diversity selection
    search_k = min(top_k * 5, index.ntotal)
    distances, indices = index.search(user_embedding, search_k)

    # Build candidate pool with similarity scores
    candidates = []
    for i, idx in enumerate(indices[0]):
        item_id = index_to_item_id[idx]
        if item_id in user_interacted_items:
            continue

        item_row = items_df[items_df['item_id'] == item_id].iloc[0]
        similarity = 1.0 - distances[0][i]  # Convert distance to similarity

        candidates.append({
            'item_id': item_id,
            'category': str(item_row['category']),  # Convert to string
            'title': item_row['item_title'],
            'similarity': similarity,
            'is_preferred': str(item_row['category']) in preferred_categories
        })

    print(f"Found {len(candidates)} candidates after filtering")

    if not candidates:
        print("No candidates found!")
        return 0.0

    # FIXED: Use a simpler, guaranteed-to-terminate selection algorithm
    recommendations = []
    category_counts = defaultdict(int)

    # Sort candidates by adjusted score first
    for candidate in candidates:
        score = candidate['similarity']
        if candidate['is_preferred']:
            score *= 1.5
        candidate['adjusted_score'] = score

    # Sort by adjusted score (highest first)
    candidates.sort(key=lambda x: x['adjusted_score'], reverse=True)

    # Select with diversity constraints
    for candidate in candidates:
        if len(recommendations) >= top_k:
            break

        # Apply diversity penalty
        category_penalty = category_counts[candidate['category']] * category_diversity_weight
        final_score = candidate['adjusted_score'] - category_penalty

        # Accept if it's still a good candidate or if we need more recommendations
        if final_score > 0 or len(recommendations) < top_k // 2:
            recommendations.append(candidate)
            category_counts[candidate['category']] += 1

    # If we still don't have enough, fill with remaining candidates
    remaining_candidates = [c for c in candidates if c not in recommendations]
    for candidate in remaining_candidates:
        if len(recommendations) >= top_k:
            break
        recommendations.append(candidate)
        category_counts[candidate['category']] += 1

    # Calculate accuracy
    correct_recommendations = sum(1 for rec in recommendations if rec['is_preferred'])
    accuracy = correct_recommendations / len(recommendations) if recommendations else 0

    print(f"Generated {len(recommendations)} recommendations")
    print(f"Category distribution:")
    for category in sorted(category_counts.keys()):
        count = category_counts[category]
        percentage = (count / len(recommendations)) * 100 if recommendations else 0
        is_preferred = "✓" if category in preferred_categories else "✗"
        print(f"  {is_preferred} {category}: {count} items ({percentage:.1f}%)")

    print(f"Accuracy: {correct_recommendations}/{len(recommendations)} ({accuracy:.1%})")

    return accuracy

In [13]:

# --- Step 6: Comprehensive Evaluation ---
print("\n[6] Comprehensive evaluation of recommendation approaches...")

def evaluate_recommendation_approaches():
    """Evaluate different recommendation approaches"""
    print("\n=== COMPREHENSIVE RECOMMENDATION EVALUATION ===")

    # Select diverse test users
    test_users = np.random.choice(unique_user_ids, size=min(10, len(unique_user_ids)), replace=False)

    results = {
        'demographic': [],
        'balanced': [],
        'category_aware': []
    }

    for user_id in test_users:
        print(f"\n{'='*60}")
        print(f"EVALUATING USER {user_id}")
        print(f"{'='*60}")

        # Test demographic recommendations
        try:
            accuracy_demo = get_demographic_recommendations(user_id, top_k=10)
            results['demographic'].append(accuracy_demo)
        except Exception as e:
            print(f"Error in demographic recommendations: {e}")
            results['demographic'].append(0.0)

        # Test balanced recommendations
        try:
            accuracy_balanced = get_balanced_recommendations(user_id, top_k=10)
            results['balanced'].append(accuracy_balanced)
        except Exception as e:
            print(f"Error in balanced recommendations: {e}")
            results['balanced'].append(0.0)

        # Test category-aware recommendations
        try:
            accuracy_category = get_category_aware_recommendations(user_id, top_k=10)
            results['category_aware'].append(accuracy_category)
        except Exception as e:
            print(f"Error in category-aware recommendations: {e}")
            results['category_aware'].append(0.0)

    # Calculate and display overall results
    print(f"\n{'='*60}")
    print("OVERALL EVALUATION RESULTS")
    print(f"{'='*60}")

    for approach, accuracies in results.items():
        valid_accuracies = [acc for acc in accuracies if acc is not None]
        if valid_accuracies:
            avg_accuracy = np.mean(valid_accuracies)
            std_accuracy = np.std(valid_accuracies)
            print(f"{approach.replace('_', ' ').title()} Approach:")
            print(f"  Average Accuracy: {avg_accuracy:.1%} (±{std_accuracy:.1%})")
            print(f"  Valid Evaluations: {len(valid_accuracies)}/{len(accuracies)}")
        else:
            print(f"{approach.replace('_', ' ').title()} Approach: No valid results")

    return results



[6] Comprehensive evaluation of recommendation approaches...


In [14]:

# --- Step 7: Interactive Recommendation Interface ---
def interactive_recommendation_demo():
    """Interactive demo of the recommendation system"""
    print("\n=== INTERACTIVE RECOMMENDATION DEMO ===")

    while True:
        print("\nChoose an option:")
        print("1. Get recommendations for a specific user")
        print("2. Get recommendations for a random user")
        print("3. Compare all approaches for a user")
        print("4. Show user demographics and preferences")
        print("5. Exit")

        choice = input("\nEnter your choice (1-5): ").strip()

        if choice == '1':
            user_id = input("Enter user ID: ").strip()
            if user_id in unique_user_ids:
                approach = input("Choose approach (demographic/balanced/category_aware): ").strip().lower()
                if approach == 'demographic':
                    get_demographic_recommendations(user_id)
                elif approach == 'balanced':
                    get_balanced_recommendations(user_id)
                elif approach == 'category_aware':
                    get_category_aware_recommendations(user_id)
                else:
                    print("Invalid approach. Using demographic approach.")
                    get_demographic_recommendations(user_id)
            else:
                print(f"User {user_id} not found. Available users: {list(unique_user_ids)[:10]}...")

        elif choice == '2':
            user_id = np.random.choice(unique_user_ids)
            print(f"Selected random user: {user_id}")
            get_demographic_recommendations(user_id)

        elif choice == '3':
            user_id = input("Enter user ID: ").strip()
            if user_id in unique_user_ids:
                print(f"Comparing all approaches for user {user_id}:")
                get_demographic_recommendations(user_id)
                get_balanced_recommendations(user_id)
                get_category_aware_recommendations(user_id)
            else:
                print(f"User {user_id} not found.")

        elif choice == '4':
            user_id = input("Enter user ID: ").strip()
            if user_id in unique_user_ids:
                user_demo = users_df[users_df['user_id'] == user_id].iloc[0]
                preferred_categories = user_categories[user_id]
                user_interactions_count = len(interactions_df[interactions_df['user_id'] == user_id])

                print(f"\nUser {user_id} Profile:")
                print(f"  Demographics: {user_demo['age_group']}, {user_demo['gender']}, {user_demo['location']}")
                print(f"  Preferred Categories: {list(preferred_categories)}")
                print("breaking")
                print(f"  Total Interactions: {user_interactions_count}")

                # Show actual interactions
                user_interactions = interactions_df[interactions_df['user_id'] == user_id]
                interaction_details = pd.merge(user_interactions, items_df, on='item_id')
                category_breakdown = interaction_details['category'].value_counts()

                print("  Interaction Breakdown:")
                for category, count in category_breakdown.items():
                    print(f"    {category}: {count} interactions")
            else:
                print(f"User {user_id} not found.")

        elif choice == '5':
            print("Goodbye!")
            break

        else:
            print("Invalid choice. Please try again.")

# Run the evaluation
evaluation_results = evaluate_recommendation_approaches()

# Start interactive demo
print("\n" + "="*60)
print("RECOMMENDATION SYSTEM READY")
print("="*60)
print("System trained and evaluated successfully!")
print("You can now use the interactive demo to explore recommendations.")

# Uncomment the line below to start the interactive demo
# interactive_recommendation_demo()

print("\n=== SYSTEM SUMMARY ===")
print(f"✓ Enhanced Item Tower: Trained with {len(items_df)} items across {len(categories)} categories")
print(f"✓ Enhanced User Tower: Trained with {len(users_df)} users with demographic features")
print(f"✓ Faiss Index: Contains {index.ntotal} normalized item embeddings")
print(f"✓ Training Data: {len(interactions_df)} user-item interactions")
print(f"✓ Evaluation: Completed on {len(evaluation_results['demographic'])} test users")
print("\nRecommendation approaches implemented:")
print("  1. Demographic-based recommendations")
print("  2. Balanced category recommendations")
print("  3. Category-aware diversity recommendations")
print("\nTo start the interactive demo, call: interactive_recommendation_demo()")


=== COMPREHENSIVE RECOMMENDATION EVALUATION ===

EVALUATING USER 205

--- Enhanced demographic recommendations for user '205' ---
User demographics: 65+, Male, Austin
Preferred categories (3): [np.str_('movies_tv'), np.str_('automotive'), np.str_('grocery')]
Generated 10 recommendations
Category distribution:
  ✓ automotive: 10 items (100.0%)
Accuracy: 10/10 (100.0%)

--- Balanced recommendations for user '205' ---
User demographics: 65+, Male, Austin
Preferred categories (3): [np.str_('movies_tv'), np.str_('automotive'), np.str_('grocery')]
Target: 3 items per category, 1 flexible slots
Generated 10 recommendations
Category distribution:
  ✓ automotive: 4 items (40.0%)
  ✓ grocery: 3 items (30.0%)
  ✓ movies_tv: 3 items (30.0%)
Accuracy: 10/10 (100.0%)

--- Category-aware recommendations for user '205' ---
User demographics: 65+, Male, Austin
Preferred categories (3): ['automotive', 'movies_tv', 'grocery']
Found 50 candidates after filtering
Generated 10 recommendations
Category dist