# Creative Effectiveness Model Evaluation & Visualization

This notebook demonstrates the training and evaluation of the multi-modal pipeline, including:
1. Model training with enhanced features.
2. Performance metrics and plots.
3. Feature importance with SHAP.
4. Model persistence.

In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import shap
from models.facade import CreativeEffectivenessFacade
from features.tabular_processor import calculate_ctr

# Set seed for reproducibility
np.random.seed(42)

IMAGES_ROOT = "images"
DATA_PATH = "data/data.csv"
MODEL_SAVE_PATH = "model_v1.pkl"

## 1. Data Preparation
We use a subset of the dataset for demonstration.

In [None]:
df = pd.read_csv(DATA_PATH)
df = df.sample(n=min(50, len(df)), random_state=42).reset_index(drop=True)

train_size = int(0.8 * len(df))
df_train = df.iloc[:train_size].copy()
df_test = df.iloc[train_size:].copy()

train_path = "data/train_subset.csv"
df_train.to_csv(train_path, index=False)

print(f"Training on {len(df_train)} samples, testing on {len(df_test)} samples.")

## 2. Model Training

In [None]:
facade = CreativeEffectivenessFacade(images_root=IMAGES_ROOT, device="cpu")
facade.train_from_csv(train_path)

## 3. Evaluation & Plotting

In [None]:
image_paths = facade._resolve_image_paths(df_test["creative_id"].tolist())
texts = facade._get_texts(df_test)
y_true = calculate_ctr(df_test).values
y_pred = facade.model.predict(df_test, image_paths, texts)

rmse = np.sqrt(np.mean((y_true - y_pred)**2))
r2 = 1 - (np.sum((y_true - y_pred)**2) / np.sum((y_true - np.mean(y_true))**2))

print(f"RMSE: {rmse:.6f}")
print(f"R^2: {r2:.6f}")

plt.figure(figsize=(10, 6))
sns.regplot(x=y_true, y=y_pred, scatter_kws={'alpha':0.5})
plt.xlabel("Actual CTR")
plt.ylabel("Predicted CTR")
plt.title("Actual vs Predicted CTR")
plt.show()

In [None]:
residuals = y_true - y_pred
plt.figure(figsize=(10, 6))
sns.histplot(residuals, kde=True)
plt.xlabel("Residual")
plt.title("Residual Distribution")
plt.show()

## 4. Feature Importance with SHAP

In [None]:
# Extract features for SHAP
X_test = facade.model._extract_all_features(df_test, image_paths, texts)

# SHAP Explainer
explainer = shap.Explainer(facade.model.regressor)
shap_values = explainer(X_test)

plt.title("SHAP Feature Importance Summary")
shap.summary_plot(shap_values, X_test, feature_names=facade.model.feature_names, max_display=15)

## 5. Model Saving

In [None]:
facade.save_model(MODEL_SAVE_PATH)
print(f"Model saved to {MODEL_SAVE_PATH}")