In [1]:
import pandas as pd
import numpy as np
import ast
import random
import tensorflow as tf
from tensorflow import keras
from keras import layers, models
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import KFold
from gensim.models import Word2Vec

# Download dataset (original code kept for compatibility)
try:
    import kagglehub
    # Download latest version if not already downloaded
    path = kagglehub.dataset_download("wilmerarltstrmberg/recipe-dataset-over-2m")
    print("Path to dataset files:", path)
except:
    print("Could not download dataset with kagglehub. Using existing files.")

# Load and preprocess data
df = pd.read_csv("recipes_data.csv")

# Print the columns to see what's available
print("Available columns:", df.columns.tolist())

df.dropna(inplace=True)

# Only drop columns that exist
columns_to_drop = []
for col in ['source', 'link', 'NER', 'site']:
    if col in df.columns:
        columns_to_drop.append(col)

if columns_to_drop:
    df.drop(columns=columns_to_drop, inplace=True)
    
print(f"Dataset shape: {df.shape}")
df.head()


  from .autonotebook import tqdm as notebook_tqdm


Path to dataset files: /Users/vincentma/.cache/kagglehub/datasets/wilmerarltstrmberg/recipe-dataset-over-2m/versions/2
Available columns: ['title', 'ingredients', 'directions']
Dataset shape: (2231141, 3)


Unnamed: 0,title,ingredients,directions
0,No-Bake Nut Cookies,"['1 c. firmly packed brown sugar', '1/2 c. eva...","['In a heavy 2-quart saucepan, mix brown sugar..."
1,Jewell Ball'S Chicken,"['1 small jar chipped beef, cut up', '4 boned ...",['Place chipped beef on bottom of baking dish....
2,Creamy Corn,"['2 (16 oz.) pkg. frozen corn', '1 (8 oz.) pkg...","['In a slow cooker, combine all ingredients. C..."
3,Chicken Funny,"['1 large whole chicken', '2 (10 1/2 oz.) cans...","['Boil and debone chicken.', 'Put bite size pi..."
4,Reeses Cups(Candy),"['1 c. peanut butter', '3/4 c. graham cracker ...",['Combine first four ingredients and press in ...


In [2]:

# Convert ingredient and directions columns from string to list
try:
    df['ingredients'] = df['ingredients'].apply(ast.literal_eval)
    df['directions'] = df['directions'].apply(ast.literal_eval)
except:
    print("Columns may already be in list format.")
    
# Check the format of the 'ingredients' column
print("Sample ingredients type:", type(df.loc[0, 'ingredients']))
print("Sample ingredients:", df.loc[0, 'ingredients'][:3])  # Show first 3 ingredients

# IMPROVEMENT 1: Enhanced Data Representation
# Preprocess ingredients to strings for TF-IDF
def preprocess_ingredients(ingredients_list):
    return " ".join([ing.lower() for ing in ingredients_list])

df['ingredients_text'] = df['ingredients'].apply(preprocess_ingredients)

# Create TF-IDF representations
print("Creating TF-IDF representations...")
tfidf = TfidfVectorizer(max_features=100)  # Reduced feature size for efficiency
ingredient_features = tfidf.fit_transform(df['ingredients_text']).toarray()
print(f"TF-IDF feature shape: {ingredient_features.shape}")

# Create a corpus of ingredient words
ingredient_corpus = [ing.split() for ingredients in df['ingredients'] for ing in ingredients]

# Train Word2Vec
print("Training Word2Vec model...")
ingredient_model = Word2Vec(sentences=ingredient_corpus, vector_size=50, window=5, min_count=1, workers=4)

# Average ingredient embeddings for each recipe
def get_recipe_embedding(ingredients):
    vectors = []
    for ing in ingredients:
        words = ing.split()
        for word in words:
            if word in ingredient_model.wv:
                vectors.append(ingredient_model.wv[word])
    if len(vectors) > 0:
        return np.mean(vectors, axis=0)
    else:
        return np.zeros(50)

print("Generating word embeddings...")
df['embedding'] = df['ingredients'].apply(get_recipe_embedding)

# IMPROVEMENT 2: Add Nutritional and Category Features
print("Extracting nutritional features...")
def extract_nutritional_features(ingredients):
    # Simple heuristics for demonstration
    protein_keywords = ['chicken', 'beef', 'fish', 'tofu', 'beans', 'lentils', 'eggs', 'pork', 'turkey', 'meat', 'lamb']
    carb_keywords = ['rice', 'pasta', 'bread', 'potato', 'flour', 'sugar', 'corn', 'oat', 'cereal', 'noodle']
    veg_keywords = ['spinach', 'broccoli', 'carrot', 'tomato', 'onion', 'celery', 'lettuce', 'cucumber', 'pepper', 'zucchini']
    dairy_keywords = ['milk', 'cheese', 'cream', 'yogurt', 'butter', 'cheddar', 'mozzarella', 'parmesan']
    
    ingredients_text = ' '.join(ingredients).lower()
    protein_score = sum([1 for k in protein_keywords if k in ingredients_text])
    carb_score = sum([1 for k in carb_keywords if k in ingredients_text])
    veg_score = sum([1 for k in veg_keywords if k in ingredients_text])
    dairy_score = sum([1 for k in dairy_keywords if k in ingredients_text])
    
    return np.array([protein_score, carb_score, veg_score, dairy_score])

df['nutritional_features'] = df['ingredients'].apply(extract_nutritional_features)


Sample ingredients type: <class 'list'>
Sample ingredients: ['1 c. firmly packed brown sugar', '1/2 c. evaporated milk', '1/2 tsp. vanilla']
Creating TF-IDF representations...
TF-IDF feature shape: (2231141, 100)
Training Word2Vec model...
Generating word embeddings...
Extracting nutritional features...


In [3]:
# Extract cooking method features
print("Extracting cooking method features...")
def extract_cooking_features(directions):
    baking_keywords = ['bake', 'oven', 'roast', 'broil']
    frying_keywords = ['fry', 'sauté', 'pan', 'skillet']
    boiling_keywords = ['boil', 'simmer', 'poach']
    grilling_keywords = ['grill', 'barbecue', 'bbq']
    
    directions_text = ' '.join(directions).lower()
    baking_score = sum([1 for k in baking_keywords if k in directions_text])
    frying_score = sum([1 for k in frying_keywords if k in directions_text])
    boiling_score = sum([1 for k in boiling_keywords if k in directions_text])
    grilling_score = sum([1 for k in grilling_keywords if k in directions_text])
    
    return np.array([baking_score, frying_score, boiling_score, grilling_score])

df['cooking_features'] = df['directions'].apply(extract_cooking_features)

# Extract complexity based on number of ingredients and steps
print("Calculating recipe complexity...")
df['ingredient_count'] = df['ingredients'].apply(len)
df['direction_count'] = df['directions'].apply(len)
df['complexity'] = (df['ingredient_count'] / df['ingredient_count'].max() + 
                    df['direction_count'] / df['direction_count'].max()) / 2 * 10  # Scale to 0-10

# Create meal plans (synthetic data)
print("Creating synthetic meal plans...")
def create_meal_plan(num_recipes=3):
    return random.sample(list(df.index), num_recipes)

# Create 1000 synthetic meal plans (increased from original 500)
meal_plans = [create_meal_plan() for _ in range(1000)]

# IMPROVEMENT 4: Implement a More Meaningful Target Variable
print("Calculating meaningful meal plan scores...")
def calculate_meal_balance_score(recipe_indices):
    # Get nutritional balance
    meal_nutrition = np.sum([df.loc[idx, 'nutritional_features'] for idx in recipe_indices], axis=0)
    
    # Calculate balance score - reward diverse nutritional profile
    protein, carbs, veg, dairy = meal_nutrition
    # A balanced meal should have some of each, but not too much
    balance = 10 - abs(protein - 2) - abs(carbs - 2) - abs(veg - 3) - abs(dairy - 1)
    
    # Add variety in cooking methods
    cooking_methods = np.sum([df.loc[idx, 'cooking_features'] for idx in recipe_indices], axis=0)
    cooking_variety = min(len([m for m in cooking_methods if m > 0]), 3)  # Up to 3 different methods
    
    # Add variety penalty (ingredients shouldn't overlap too much)
    all_ingredients = []
    for idx in recipe_indices:
        all_ingredients.extend(df.loc[idx, 'ingredients'])
    
    unique_ratio = len(set(all_ingredients)) / len(all_ingredients) if all_ingredients else 0
    variety_score = unique_ratio * 5
    
    # Complexity balance
    complexities = [df.loc[idx, 'complexity'] for idx in recipe_indices]
    complexity_balance = 3 - min(abs(max(complexities) - min(complexities)), 3)
    
    final_score = balance + variety_score + cooking_variety + complexity_balance
    # Normalize to a 1-10 scale
    return max(1, min(10, final_score))

# Create more meaningful training data
y_meaningful = np.array([calculate_meal_balance_score(plan) for plan in meal_plans])
print(f"Target score range: {y_meaningful.min():.2f} - {y_meaningful.max():.2f}")

# IMPROVEMENT 6: Implement User Preferences
print("Simulating user preferences...")
# Simulate user preferences (in a real system, these would come from user data)
user_preferences = {
    'vegetarian': False,
    'spice_level': 'medium',  # low, medium, high
    'favorite_ingredients': ['chicken', 'garlic', 'olive oil'],
    'disliked_ingredients': ['cilantro', 'mushrooms']
}

Extracting cooking method features...
Calculating recipe complexity...
Creating synthetic meal plans...
Calculating meaningful meal plan scores...
Target score range: 3.80 - 10.00
Simulating user preferences...


In [4]:
# Function to score recipes based on user preferences
def user_preference_score(recipe_idx, preferences):
    ingredients = ' '.join(df.loc[recipe_idx, 'ingredients']).lower()
    
    # Initialize score
    score = 5.0
    
    # Check for vegetarian preference
    if preferences['vegetarian']:
        meat_keywords = ['chicken', 'beef', 'pork', 'fish', 'meat', 'turkey']
        if any(meat in ingredients for meat in meat_keywords):
            score -= 3.0
    
    # Check for favorite ingredients
    for ingredient in preferences['favorite_ingredients']:
        if ingredient in ingredients:
            score += 0.5
    
    # Check for disliked ingredients
    for ingredient in preferences['disliked_ingredients']:
        if ingredient in ingredients:
            score -= 1.0
    
    return score

# Adjust meal plan scores based on user preferences
def adjust_meal_plan_score(plan_score, recipe_indices, user_prefs):
    pref_score = sum(user_preference_score(idx, user_prefs) for idx in recipe_indices)
    return plan_score + (pref_score / len(recipe_indices))

# Apply user preferences to our target scores
y_with_preferences = np.array([
    adjust_meal_plan_score(
        y_meaningful[i], 
        meal_plans[i], 
        user_preferences
    ) for i in range(len(meal_plans))
])

print(f"Target scores after preferences: {y_with_preferences.min():.2f} - {y_with_preferences.max():.2f}")

# Feature extraction for meal plans
print("Preparing features for model training...")
def get_plan_features(plan):
    # Extract Word2Vec embeddings
    embeddings = np.array([df.loc[idx, 'embedding'] for idx in plan])
    emb_avg = np.mean(embeddings, axis=0)
    
    # Extract TF-IDF features
    tfidf_features = np.array([ingredient_features[idx] for idx in plan])
    tfidf_avg = np.mean(tfidf_features, axis=0)
    
    # Extract nutritional features
    nutrition = np.array([df.loc[idx, 'nutritional_features'] for idx in plan])
    nutrition_sum = np.sum(nutrition, axis=0)
    
    # Extract cooking features
    cooking = np.array([df.loc[idx, 'cooking_features'] for idx in plan])
    cooking_sum = np.sum(cooking, axis=0)
    
    # Complexity metrics
    complexities = np.array([df.loc[idx, 'complexity'] for idx in plan])
    complexity_features = np.array([
        np.mean(complexities),
        np.max(complexities) - np.min(complexities)  # Range of complexity
    ])
    
    # Concatenate all features
    return {
        'embedding_input': emb_avg,
        'tfidf_input': tfidf_avg,
        'nutrition_input': nutrition_sum,
        'cooking_input': cooking_sum,
        'complexity_input': complexity_features
    }

Target scores after preferences: 8.96 - 16.17
Preparing features for model training...


In [5]:

# Prepare data for training
X_features = [get_plan_features(plan) for plan in meal_plans]

# IMPROVEMENT 3: Implement a More Sophisticated Model Architecture
print("Building improved model architecture...")

# Define feature dimensions
embedding_dim = 50
tfidf_dim = ingredient_features.shape[1]
nutrition_dim = 4
cooking_dim = 4
complexity_dim = 2

def build_improved_model():
    # Input layers
    embedding_input = layers.Input(shape=(embedding_dim,), name='embedding_input')
    tfidf_input = layers.Input(shape=(tfidf_dim,), name='tfidf_input')
    nutrition_input = layers.Input(shape=(nutrition_dim,), name='nutrition_input')
    cooking_input = layers.Input(shape=(cooking_dim,), name='cooking_input')
    complexity_input = layers.Input(shape=(complexity_dim,), name='complexity_input')
    
    # Process embedding features
    x1 = layers.Dense(64, activation='relu', kernel_regularizer=keras.regularizers.l2(0.001))(embedding_input)
    x1 = layers.Dropout(0.3)(x1)
    x1 = layers.Dense(32, activation='relu')(x1)
    
    # Process TF-IDF features
    x2 = layers.Dense(32, activation='relu', kernel_regularizer=keras.regularizers.l2(0.001))(tfidf_input)
    x2 = layers.Dropout(0.3)(x2)
    
    # Process nutrition features
    x3 = layers.Dense(16, activation='relu')(nutrition_input)
    
    # Process cooking features
    x4 = layers.Dense(8, activation='relu')(cooking_input)
    
    # Process complexity features
    x5 = layers.Dense(4, activation='relu')(complexity_input)
    
    # Combine all features
    concatenated = layers.Concatenate()([x1, x2, x3, x4, x5])
    
    # Final prediction layers
    x = layers.Dense(64, activation='relu', kernel_regularizer=keras.regularizers.l2(0.001))(concatenated)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.3)(x)
    x = layers.Dense(32, activation='relu')(x)
    output = layers.Dense(1)(x)
    
    # Create model
    model = models.Model(
        inputs=[embedding_input, tfidf_input, nutrition_input, cooking_input, complexity_input],
        outputs=output
    )
    
    # Compile with better optimizer and learning rate scheduling
    optimizer = keras.optimizers.Adam(learning_rate=0.001)
    model.compile(optimizer=optimizer, loss='mse', metrics=['mae'])
    
    return model

# IMPROVEMENT 5: Add Regularization and Early Stopping
print("Setting up callbacks for training...")
early_stopping = keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=10,
    restore_best_weights=True
)

reduce_lr = keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.2,
    patience=5,
    min_lr=0.0001
)

# IMPROVEMENT 7: Implement K-fold Cross-Validation
print("Implementing K-fold cross-validation...")
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Prepare inputs for training
def prepare_model_inputs(features_list):
    return {
        'embedding_input': np.array([f['embedding_input'] for f in features_list]),
        'tfidf_input': np.array([f['tfidf_input'] for f in features_list]),
        'nutrition_input': np.array([f['nutrition_input'] for f in features_list]),
        'cooking_input': np.array([f['cooking_input'] for f in features_list]),
        'complexity_input': np.array([f['complexity_input'] for f in features_list])
    }

# IMPROVEMENT 9: Implement Evaluation Metrics Specific to Recommendation Systems
def precision_at_k(y_true, y_pred, k=5):
    # Sort predictions and get top k
    sorted_indices = np.argsort(y_pred)[::-1][:k]
    # Count how many are relevant (above threshold in true values)
    threshold = np.percentile(y_true, 70)  # Consider top 30% as relevant
    num_relevant = sum(1 for i in sorted_indices if y_true[i] > threshold)
    return num_relevant / k


Building improved model architecture...
Setting up callbacks for training...
Implementing K-fold cross-validation...


In [6]:

# Run k-fold cross-validation
fold_scores = []
fold_precision = []

print("Starting cross-validation training...")
for fold, (train_idx, val_idx) in enumerate(kf.split(X_features)):
    print(f"\nTraining fold {fold+1}/5...")
    
    # Split data
    X_train = [X_features[i] for i in train_idx]
    X_val = [X_features[i] for i in val_idx]
    y_train = y_with_preferences[train_idx]
    y_val = y_with_preferences[val_idx]
    
    # Prepare inputs
    train_inputs = prepare_model_inputs(X_train)
    val_inputs = prepare_model_inputs(X_val)
    
    # Build and train model
    model = build_improved_model()
    
    history = model.fit(
        train_inputs, y_train,
        epochs=50,
        batch_size=32,
        validation_data=(val_inputs, y_val),
        callbacks=[early_stopping, reduce_lr],
        verbose=1
    )
    
    # Evaluate model
    val_score = model.evaluate(val_inputs, y_val)[1]  # Get MAE
    fold_scores.append(val_score)
    
    # Calculate precision@5
    y_pred = model.predict(val_inputs).flatten()
    p_at_5 = precision_at_k(y_val, y_pred, k=5)
    fold_precision.append(p_at_5)
    
    print(f"Fold {fold+1} - MAE: {val_score:.4f}, Precision@5: {p_at_5:.4f}")

print(f"\nCross-validation MAE scores: {fold_scores}")
print(f"Average MAE: {np.mean(fold_scores):.4f}")
print(f"Cross-validation Precision@5 scores: {fold_precision}")
print(f"Average Precision@5: {np.mean(fold_precision):.4f}")

# Train final model on all data
print("\nTraining final model on all data...")
all_inputs = prepare_model_inputs(X_features)
final_model = build_improved_model()

final_history = final_model.fit(
    all_inputs, y_with_preferences,
    epochs=50,
    batch_size=32,
    validation_split=0.2,
    callbacks=[early_stopping, reduce_lr],
    verbose=1
)

# Generate recommendations
print("\nGenerating optimized meal plan recommendations...")
# Generate candidate meal plans
candidate_plans = [create_meal_plan() for _ in range(200)]
candidate_features = [get_plan_features(plan) for plan in candidate_plans]
candidate_inputs = prepare_model_inputs(candidate_features)

# Predict scores
predicted_scores = final_model.predict(candidate_inputs).flatten()

# Recommend top N plans
top_indices = np.argsort(predicted_scores)[-5:][::-1]

# View best meal plans
print("\nTop 5 Recommended Meal Plans:")
for i, idx in enumerate(top_indices):
    plan = candidate_plans[idx]
    recipes = df.loc[plan, 'title'].values
    
    # Calculate nutritional profile
    nutrition = np.sum([df.loc[recipe_idx, 'nutritional_features'] for recipe_idx in plan], axis=0)
    
    print(f"\nPlan {i+1} - Predicted Score: {predicted_scores[idx]:.2f}")
    print(f"Recipes: {recipes}")
    print(f"Nutritional Profile: Protein: {nutrition[0]}, Carbs: {nutrition[1]}, Vegetables: {nutrition[2]}, Dairy: {nutrition[3]}")

# IMPROVEMENT 8: Collaborative Filtering Elements (Simulated)
print("\nSimulating collaborative filtering elements...")
# In a real system, you would have user ratings for meal plans
# Here we'll simulate user ratings based on our existing data

# Create a few simulated users with different preferences
user_profiles = [
    {
        'id': 1,
        'vegetarian': False,
        'favorite_ingredients': ['chicken', 'garlic', 'cheese'],
        'disliked_ingredients': ['mushroom', 'olive']
    },
    {
        'id': 2,
        'vegetarian': True,
        'favorite_ingredients': ['tofu', 'spinach', 'broccoli'],
        'disliked_ingredients': ['cheese', 'cream']
    },
    {
        'id': 3,
        'vegetarian': False,
        'favorite_ingredients': ['beef', 'potato', 'onion'],
        'disliked_ingredients': ['cilantro', 'spicy']
    }
]


Starting cross-validation training...

Training fold 1/5...
Epoch 1/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 182.6813 - mae: 13.4493 - val_loss: 182.0894 - val_mae: 13.4414 - learning_rate: 0.0010
Epoch 2/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 136.9191 - mae: 11.5484 - val_loss: 143.8265 - val_mae: 11.8619 - learning_rate: 0.0010
Epoch 3/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 85.7674 - mae: 8.8459 - val_loss: 99.0111 - val_mae: 9.6901 - learning_rate: 0.0010
Epoch 4/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 44.9811 - mae: 5.9466 - val_loss: 63.5885 - val_mae: 7.5443 - learning_rate: 0.0010
Epoch 5/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 22.9369 - mae: 4.0130 - val_loss: 43.5888 - val_mae: 6.3257 - learning_rate: 0.0010
Epoch 6/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[

In [7]:

# Function to simulate a user's rating for a meal plan
def simulate_user_rating(user, plan):
    base_score = calculate_meal_balance_score(plan)
    
    # Apply user preferences
    for recipe_idx in plan:
        ingredients_text = ' '.join(df.loc[recipe_idx, 'ingredients']).lower()
        
        # Check vegetarian preference
        if user['vegetarian']:
            meat_keywords = ['chicken', 'beef', 'pork', 'fish', 'meat', 'turkey']
            if any(meat in ingredients_text for meat in meat_keywords):
                base_score -= 2.0
        
        # Check favorite ingredients
        for ingredient in user['favorite_ingredients']:
            if ingredient in ingredients_text:
                base_score += 0.3
        
        # Check disliked ingredients
        for ingredient in user['disliked_ingredients']:
            if ingredient in ingredients_text:
                base_score -= 0.5
    
    # Normalize to 1-10 scale
    return max(1, min(10, base_score))

# Generate simulated ratings data
print("Generating simulated user ratings...")
ratings_data = []
for user in user_profiles:
    # Each user rates 50 random meal plans
    for _ in range(50):
        plan = create_meal_plan()
        rating = simulate_user_rating(user, plan)
        
        # Calculate average plan embedding
        plan_embedding = np.mean([df.loc[idx, 'embedding'] for idx in plan], axis=0)
        
        ratings_data.append({
            'user_id': user['id'],
            'plan_embedding': plan_embedding,
            'rating': rating
        })

# Define a hybrid model with collaborative filtering
def build_hybrid_model(embedding_dim=50, num_users=3):
    # Content-based part (recipe embeddings)
    recipe_input = layers.Input(shape=(embedding_dim,))
    recipe_features = layers.Dense(32, activation='relu')(recipe_input)
    
    # Collaborative filtering part
    user_input = layers.Input(shape=(1,))
    user_embedding = layers.Embedding(num_users + 1, 32, input_length=1)(user_input)
    user_embedding = layers.Flatten()(user_embedding)
    
    # Combine both approaches
    concatenated = layers.Concatenate()([recipe_features, user_embedding])
    x = layers.Dense(32, activation='relu')(concatenated)
    x = layers.Dropout(0.3)(x)
    output = layers.Dense(1)(x)
    
    model = models.Model(inputs=[recipe_input, user_input], outputs=output)
    model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    
    return model

# Prepare data for hybrid model
X_recipe = np.array([data['plan_embedding'] for data in ratings_data])
X_user = np.array([data['user_id'] for data in ratings_data])
y_rating = np.array([data['rating'] for data in ratings_data])

# Train hybrid model
print("Training hybrid recommendation model...")
hybrid_model = build_hybrid_model()
hybrid_history = hybrid_model.fit(
    [X_recipe, X_user], 
    y_rating,
    epochs=20,
    batch_size=8,
    validation_split=0.2,
    verbose=1
)

# Generate personalized recommendations for a specific user
def generate_personalized_recommendations(user_id, num_recommendations=5):
    # Generate candidate plans
    candidate_plans = [create_meal_plan() for _ in range(100)]
    
    # Prepare input for prediction
    candidate_embeddings = np.array([
        np.mean([df.loc[idx, 'embedding'] for idx in plan], axis=0)
        for plan in candidate_plans
    ])
    
    user_ids = np.full(len(candidate_plans), user_id)
    
    # Predict ratings
    predicted_ratings = hybrid_model.predict([candidate_embeddings, user_ids]).flatten()
    
    # Get top recommendations
    top_indices = np.argsort(predicted_ratings)[-num_recommendations:][::-1]
    
    return [candidate_plans[i] for i in top_indices], predicted_ratings[top_indices]

# Generate personalized recommendations for each user
print("\nGenerating personalized recommendations for each user profile:")
for user in user_profiles:
    user_id = user['id']
    recommended_plans, predicted_ratings = generate_personalized_recommendations(user_id)
    
    print(f"\nPersonalized Recommendations for User {user_id}")
    print(f"User profile: {'Vegetarian' if user['vegetarian'] else 'Non-vegetarian'}, " + 
          f"Likes: {', '.join(user['favorite_ingredients'])}, " + 
          f"Dislikes: {', '.join(user['disliked_ingredients'])}")
    
    for i, (plan, rating) in enumerate(zip(recommended_plans, predicted_ratings)):
        recipes = df.loc[plan, 'title'].values
        print(f"  Plan {i+1} - Predicted Rating: {rating:.2f}")
        print(f"  Recipes: {recipes}")

print("\nModel training and evaluation complete.")

Generating simulated user ratings...
Training hybrid recommendation model...
Epoch 1/20




[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 81.5813 - mae: 8.8909 - val_loss: 67.9858 - val_mae: 8.1690
Epoch 2/20
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 56.6650 - mae: 7.3700 - val_loss: 45.0156 - val_mae: 6.6104
Epoch 3/20
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 32.1592 - mae: 5.3852 - val_loss: 22.2923 - val_mae: 4.5518
Epoch 4/20
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 15.2798 - mae: 3.5503 - val_loss: 6.7192 - val_mae: 2.3084
Epoch 5/20
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 6.0279 - mae: 2.0787 - val_loss: 3.3637 - val_mae: 1.4933
Epoch 6/20
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 5.5820 - mae: 1.8326 - val_loss: 3.6926 - val_mae: 1.6143
Epoch 7/20
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 7.2728 - mae