# 02. Train Models and Simulate

Train CTR models, run simulations with different policies, and evaluate results.

In [None]:
import sys
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

sys.path.insert(0, os.path.abspath('..'))

from src import exposure_log, models, bandits, simulation, evaluation, utils

SEED = 42
rng = utils.set_random_seed(SEED)
plt.style.use('seaborn-v0_8-darkgrid')
print('Setup complete!')

## 1. Load Processed Data

In [None]:
guests_df = pd.read_csv('../data/processed/unified_guests.csv')
ads_df = pd.read_csv('../data/processed/advertisers.csv')
guest_ad_prefs = pd.read_csv('../data/processed/guest_ad_preferences.csv')

print(f'Loaded {len(guests_df)} guests, {len(ads_df)} ads, {len(guest_ad_prefs)} preferences')
guests_df['arrival_date'] = pd.to_datetime(guests_df['arrival_date'])

## 2. Generate Exposure Log (Logging Policy)

In [None]:
sample_guests = guests_df.sample(min(500, len(guests_df)), random_state=SEED)

exp_log = exposure_log.generate_exposure_log(
    sample_guests, ads_df, guest_ad_prefs,
    n_sessions_per_stay=4,
    logging_policy='popularity',
    k_ads_per_session=3,
    seed=SEED
)

print(f'Generated {len(exp_log)} exposures')
print(f'CTR: {exp_log["click"].mean():.3f}')
exp_log.head()

## 3. Train Baseline Models

In [None]:
# Popularity baseline
pop_ranker = models.PopularityRanker()
pop_ranker.fit(exp_log)
print('Trained PopularityRanker')

# Logistic regression
lr_ranker = models.LogisticRegressionRanker()
lr_ranker.fit(exp_log, guests_df, ads_df)
print('Trained LogisticRegressionRanker')

# Gradient boosting
try:
    gbm_ranker = models.GradientBoostingRanker(use_xgboost=True, n_estimators=50)
    gbm_ranker.fit(exp_log, guests_df, ads_df)
    print('Trained GradientBoostingRanker')
except:
    print('GBM training failed (XGBoost/LightGBM may not be installed)')

## 4. Evaluate Models (Offline Ranking)

In [None]:
model_dict = {'Popularity': pop_ranker, 'LogisticRegression': lr_ranker}
if 'gbm_ranker' in locals():
    model_dict['GradientBoosting'] = gbm_ranker

ranking_results = evaluation.compare_models(
    model_dict, exp_log, guests_df, ads_df, guest_ad_prefs, k=3
)

print('\nRanking Evaluation Results:')
print(ranking_results)

## 5. Run Simulations with Different Policies

In [None]:
# Create simple policy wrappers
class PopularityPolicy:
    def __init__(self, ads_df):
        self.ads_df = ads_df
    def select_ads(self, guest, candidates, k=3):
        return exposure_log.popularity_policy('', candidates, k)

class RandomPolicy:
    def select_ads(self, guest, candidates, k=3):
        return exposure_log.random_policy('', candidates, k, seed=SEED)

class EpsilonGreedyPolicy:
    def __init__(self):
        self.bandit = bandits.EpsilonGreedyBandit(epsilon=0.1, seed=SEED)
    def select_ads(self, guest, candidates, k=3):
        return self.bandit.select_ads(guest, candidates, k)
    def update(self, ad_id, reward):
        self.bandit.update(ad_id, reward)

print('Policies created')

In [None]:
# Run simulations
sim_guests = guests_df.sample(min(300, len(guests_df)), random_state=SEED)

policies = {
    'Popularity': PopularityPolicy(ads_df),
    'Random': RandomPolicy(),
    'EpsilonGreedy': EpsilonGreedyPolicy()
}

all_sim_results = []

for policy_name, policy in policies.items():
    print(f'\nRunning simulation: {policy_name}')
    sim_log, metrics = simulation.run_simulation(
        policy, sim_guests, ads_df, guest_ad_prefs,
        n_sessions_per_stay=4,
        alpha=0.3,
        gamma=0.5,
        seed=SEED,
        policy_name=policy_name
    )
    all_sim_results.append(metrics)
    print(f'  CTR: {metrics["ctr"]:.4f}, Revenue/stay: {metrics["revenue_per_stay"]:.2f}')

sim_comparison = pd.DataFrame(all_sim_results)
print('\nSimulation Results:')
print(sim_comparison)

## 6. Visualize Results

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(15, 4))

# CTR comparison
sim_comparison.plot(x='policy', y='ctr', kind='bar', ax=axes[0], legend=False, rot=45)
axes[0].set_title('Click-Through Rate')
axes[0].set_ylabel('CTR')

# Revenue per stay
sim_comparison.plot(x='policy', y='revenue_per_stay', kind='bar', ax=axes[1], legend=False, rot=45)
axes[1].set_title('Revenue per Stay')
axes[1].set_ylabel('Revenue (CHF)')

# Guest experience
sim_comparison.plot(x='policy', y='guest_experience_index', kind='bar', ax=axes[2], legend=False, rot=45)
axes[2].set_title('Guest Experience Index')
axes[2].set_ylabel('Index (higher = better)')

plt.tight_layout()
plt.show()

## 7. Awareness Dynamics Analysis

In [None]:
# Test different awareness parameters
alpha_values = [0.1, 0.3, 0.5]
gamma_values = [0.3, 0.5, 0.7]

awareness_results = []

for alpha in alpha_values:
    for gamma in gamma_values:
        _, metrics = simulation.run_simulation(
            PopularityPolicy(ads_df), sim_guests.head(100), ads_df, guest_ad_prefs,
            n_sessions_per_stay=4, alpha=alpha, gamma=gamma, seed=SEED,
            policy_name=f'Pop_a{alpha}_g{gamma}'
        )
        metrics['alpha'] = alpha
        metrics['gamma'] = gamma
        awareness_results.append(metrics)

awareness_df = pd.DataFrame(awareness_results)
print('\nAwareness Parameter Sweep:')
print(awareness_df[['alpha', 'gamma', 'ctr', 'revenue_per_stay', 'avg_intrusion']])

## 8. Summary

This notebook demonstrated:
- Training baseline CTR models (Popularity, Logistic Regression, GBM)
- Generating exposure logs with logging policies
- Running simulations with awareness dynamics
- Comparing policies on CTR, revenue, and guest experience
- Analyzing impact of awareness parameters