In [None]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sys
import warnings
warnings.filterwarnings('ignore')

# Add src to path
sys.path.append('../src')

# Import custom modules
from data_utils import create_binary_target, filter_sparse_users_recipes
from features import (
    compute_user_features, compute_recipe_features, 
    create_modeling_dataset, get_feature_columns,
    split_temporal_per_user, prepare_features_for_training
)
from models import (
    GlobalAverageBaseline, RecipeAverageBaseline, UserAverageBaseline,
    LogisticRegressionModel, tune_hyperparameters, RecipeRecommender
)
from eval_utils import (
    evaluate_classification, compare_models, plot_roc_curve,
    plot_precision_recall_curve, plot_confusion_matrix,
    plot_feature_importance, evaluate_recommender, create_evaluation_report
)

# Set plotting style
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (10, 6)

print("Libraries imported successfully!")

## 1. Load Cleaned Data

In [None]:
# Load cleaned data from EDA notebook
recipes = pd.read_csv('../datasets/recipes_clean.csv')
interactions = pd.read_csv('../datasets/interactions_clean.csv')

# Convert dates
interactions['date'] = pd.to_datetime(interactions['date'])
recipes['submitted'] = pd.to_datetime(recipes['submitted'])

print(f"Loaded {len(recipes)} recipes")
print(f"Loaded {len(interactions)} interactions")

## 2. Define Binary Target and Filter Sparse Entities

In [None]:
# Create binary 'like' target (rating >= 4)
interactions = create_binary_target(interactions, rating_threshold=4)

print("Binary target 'is_like' created:")
print(interactions['is_like'].value_counts())
print(f"\nLike rate: {interactions['is_like'].mean():.3f}")

In [None]:
# Filter sparse users and recipes
print("Before filtering:")
print(f"  Users: {interactions['user_id'].nunique()}")
print(f"  Recipes: {interactions['recipe_id'].nunique()}")
print(f"  Interactions: {len(interactions)}")

interactions_filtered = filter_sparse_users_recipes(
    interactions, 
    min_user_interactions=5,
    min_recipe_interactions=5
)

print("\nAfter filtering:")
print(f"  Users: {interactions_filtered['user_id'].nunique()}")
print(f"  Recipes: {interactions_filtered['recipe_id'].nunique()}")
print(f"  Interactions: {len(interactions_filtered)}")
print(f"  Retained: {100 * len(interactions_filtered) / len(interactions):.1f}%")

## 3. Feature Engineering

In [None]:
# Compute user-level features
user_features = compute_user_features(interactions_filtered, recipes)
print(f"Computed features for {len(user_features)} users")
display(user_features.head())

In [None]:
# Compute recipe-level features
recipe_features = compute_recipe_features(interactions_filtered)
print(f"Computed features for {len(recipe_features)} recipes")
display(recipe_features.head())

In [None]:
# Create full modeling dataset
model_df = create_modeling_dataset(
    interactions_filtered, 
    recipes, 
    user_features, 
    recipe_features
)

print(f"Modeling dataset created: {len(model_df)} rows")
print(f"\nColumns: {list(model_df.columns)}")

# Drop rows with missing values
model_df_clean = model_df.dropna(subset=get_feature_columns() + ['is_like'])
print(f"\nAfter dropping NaN: {len(model_df_clean)} rows ({100 * len(model_df_clean)/len(model_df):.1f}% retained)")

## 4. Train/Validation/Test Split (Temporal per User)

In [None]:
# Temporal split per user
train_df, val_df, test_df = split_temporal_per_user(
    model_df_clean,
    train_ratio=0.6,
    val_ratio=0.2,
    test_ratio=0.2
)

print("Data split:")
print(f"  Train: {len(train_df)} rows, {train_df['user_id'].nunique()} users, {train_df['recipe_id'].nunique()} recipes")
print(f"  Val:   {len(val_df)} rows, {val_df['user_id'].nunique()} users, {val_df['recipe_id'].nunique()} recipes")
print(f"  Test:  {len(test_df)} rows, {test_df['user_id'].nunique()} users, {test_df['recipe_id'].nunique()} recipes")

print("\nLike rates:")
print(f"  Train: {train_df['is_like'].mean():.3f}")
print(f"  Val:   {val_df['is_like'].mean():.3f}")
print(f"  Test:  {test_df['is_like'].mean():.3f}")

## 5. Prepare Features for Modeling

In [None]:
# Get feature columns
feature_cols = get_feature_columns(for_modeling=True)
print(f"Feature columns ({len(feature_cols)}): {feature_cols}")

# Prepare feature matrices
X_train, y_train, X_val, y_val, X_test, y_test, available_features = prepare_features_for_training(
    train_df, val_df, test_df, feature_cols
)

print(f"\nAvailable features ({len(available_features)}): {available_features}")
print(f"\nTrain shape: X={X_train.shape}, y={y_train.shape}")
print(f"Val shape:   X={X_val.shape}, y={y_val.shape}")
print(f"Test shape:  X={X_test.shape}, y={y_test.shape}")

## 6. Baseline Models

In [None]:
# Baseline 1: Global Average
baseline_global = GlobalAverageBaseline()
baseline_global.fit(y_train)

y_test_proba_global = baseline_global.predict_proba(X_test)[:, 1]
y_test_pred_global = baseline_global.predict(X_test)

results_global = evaluate_classification(
    y_test, y_test_pred_global, y_test_proba_global, "Global Average"
)

print("Global Average Baseline:")
print(baseline_global)
print(f"  Test AUC: {results_global['roc_auc']:.4f}")
print(f"  Test Accuracy: {results_global['accuracy']:.4f}")

In [None]:
# Baseline 2: Recipe Average
baseline_recipe = RecipeAverageBaseline()
baseline_recipe.fit(train_df)

y_test_proba_recipe = baseline_recipe.predict_proba(test_df)[:, 1]
y_test_pred_recipe = (y_test_proba_recipe >= 0.5).astype(int)

results_recipe = evaluate_classification(
    y_test, y_test_pred_recipe, y_test_proba_recipe, "Recipe Average"
)

print("Recipe Average Baseline:")
print(baseline_recipe)
print(f"  Test AUC: {results_recipe['roc_auc']:.4f}")
print(f"  Test Accuracy: {results_recipe['accuracy']:.4f}")

In [None]:
# Baseline 3: User Average
baseline_user = UserAverageBaseline()
baseline_user.fit(train_df)

y_test_proba_user = baseline_user.predict_proba(test_df)[:, 1]
y_test_pred_user = (y_test_proba_user >= 0.5).astype(int)

results_user = evaluate_classification(
    y_test, y_test_pred_user, y_test_proba_user, "User Average"
)

print("User Average Baseline:")
print(baseline_user)
print(f"  Test AUC: {results_user['roc_auc']:.4f}")
print(f"  Test Accuracy: {results_user['accuracy']:.4f}")

## 7. Logistic Regression with Hyperparameter Tuning

In [None]:
# Tune hyperparameters on validation set
best_model, tuning_results = tune_hyperparameters(
    X_train, y_train, X_val, y_val,
    C_values=[0.01, 0.1, 1, 10, 100],
    metric='roc_auc',
    feature_names=available_features
)

print("Hyperparameter tuning results:")
display(tuning_results)
print(f"\nBest model: {best_model}")

In [None]:
# Evaluate best model on all splits
# Training
y_train_proba = best_model.predict_proba(X_train)[:, 1]
y_train_pred = best_model.predict(X_train)
results_train = evaluate_classification(y_train, y_train_pred, y_train_proba, "Logistic Regression")

# Validation
y_val_proba = best_model.predict_proba(X_val)[:, 1]
y_val_pred = best_model.predict(X_val)
results_val = evaluate_classification(y_val, y_val_pred, y_val_proba, "Logistic Regression")

# Test
y_test_proba_lr = best_model.predict_proba(X_test)[:, 1]
y_test_pred_lr = best_model.predict(X_test)
results_test = evaluate_classification(y_test, y_test_pred_lr, y_test_proba_lr, "Logistic Regression")

print("Logistic Regression Performance:")
print("="*60)
report = create_evaluation_report(results_train, results_val, results_test)
display(report)

## 8. Compare All Models

In [None]:
# Compare all models on test set
all_results = [
    results_global,
    results_recipe,
    results_user,
    results_test
]

comparison = compare_models(all_results, metric='roc_auc')

print("Model Comparison (Test Set):")
print("="*80)
display(comparison)

## 9. Visualize Model Performance

In [None]:
# ROC Curves
fig, ax = plt.subplots(figsize=(10, 8))

plot_roc_curve(y_test, y_test_proba_global, "Global Average", ax)
plot_roc_curve(y_test, y_test_proba_recipe, "Recipe Average", ax)
plot_roc_curve(y_test, y_test_proba_user, "User Average", ax)
plot_roc_curve(y_test, y_test_proba_lr, "Logistic Regression", ax)

plt.tight_layout()
plt.savefig('../reports/figures/roc_curve_comparison.png', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
# Precision-Recall Curve
fig, ax = plt.subplots(figsize=(10, 8))

plot_precision_recall_curve(y_test, y_test_proba_lr, "Logistic Regression", ax)

plt.tight_layout()
plt.savefig('../reports/figures/precision_recall_curve.png', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
# Confusion Matrix
fig, ax = plt.subplots(figsize=(8, 6))
plot_confusion_matrix(y_test, y_test_pred_lr, ax=ax)
plt.tight_layout()
plt.savefig('../reports/figures/confusion_matrix.png', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
# Feature Importance
coefficients_df = best_model.get_coefficients()

print("Top 10 Most Important Features:")
display(coefficients_df.head(10))

fig, ax = plt.subplots(figsize=(10, 8))
plot_feature_importance(coefficients_df, top_n=20, ax=ax)
plt.tight_layout()
plt.savefig('../reports/figures/feature_importance.png', dpi=300, bbox_inches='tight')
plt.show()

## 10. Build Recipe Recommender

In [None]:
# Create recommender
recommender = RecipeRecommender(best_model, available_features)
print(f"Recommender created with {len(available_features)} features")

In [None]:
# Create candidate set for test users
# For each user, we'll consider recipes they haven't rated in training
test_users = test_df['user_id'].unique()[:100]  # Sample 100 users for efficiency

print(f"Creating candidates for {len(test_users)} test users...")

# Get recipes each user has already rated in train/val
train_val_df = pd.concat([train_df, val_df])
user_rated_recipes = train_val_df.groupby('user_id')['recipe_id'].apply(set).to_dict()

# For each test user, create candidates from popular recipes they haven't rated
popular_recipes = recipe_features.nlargest(500, 'recipe_num_ratings')['recipe_id'].values

candidate_rows = []
for user_id in test_users:
    rated = user_rated_recipes.get(user_id, set())
    candidates = [r for r in popular_recipes if r not in rated]
    
    for recipe_id in candidates[:50]:  # Top 50 candidates per user
        candidate_rows.append({'user_id': user_id, 'recipe_id': recipe_id})

candidates_df = pd.DataFrame(candidate_rows)
print(f"Created {len(candidates_df)} candidate pairs")

# Add features to candidates
candidates_full = create_modeling_dataset(candidates_df, recipes, user_features, recipe_features)
candidates_full = candidates_full.dropna(subset=available_features)

print(f"Candidates with features: {len(candidates_full)} pairs")

In [None]:
# Generate recommendations (without health adjustment)
recommendations = recommender.recommend_batch(
    test_users, 
    candidates_full, 
    top_k=10,
    health_weight=0.0
)

print(f"Generated recommendations for {len(recommendations)} users")

# Show example
example_user = test_users[0]
print(f"\nExample recommendations for user {example_user}:")
if example_user in recommendations:
    display(recommendations[example_user])

## 11. Evaluate Recommender System

In [None]:
# Evaluate recommender using Precision@K and Recall@K
rec_metrics = evaluate_recommender(
    test_df, 
    recommendations, 
    k_values=[5, 10]
)

print("Recommender Evaluation Metrics:")
print("="*40)
for metric, value in rec_metrics.items():
    print(f"{metric}: {value:.4f}")

## 12. Save Results

In [None]:
# Save model evaluation results
report.to_csv('../reports/model_evaluation.csv', index=False)
comparison.to_csv('../reports/model_comparison.csv', index=False)

# Save feature importance
coefficients_df.to_csv('../reports/feature_importance.csv', index=False)

# Save recommender metrics
pd.DataFrame([rec_metrics]).to_csv('../reports/recommender_metrics.csv', index=False)

print("Results saved to reports/ directory")

## Summary

**Key Findings:**

1. **Baseline Performance**: All baselines provide reasonable predictions, with recipe and user averages outperforming global average

2. **Logistic Regression**: Achieves best performance by combining user, recipe, and nutrition features

3. **Important Features**: Top predictors include user/recipe mean ratings, nutrition metrics, and healthiness indicators

4. **Recommender System**: Successfully generates personalized recommendations with measurable precision and recall

**Next Steps:**
- Analyze health bias in recommendations
- Explore trade-offs between recommendation quality and healthiness