# Chapter 17: Bayesian Optimization Code Verification

This notebook tests and verifies the Bayesian Optimization example from Chapter 17.

We'll implement:
1. Installation of required packages (Ax)
2. The complete code example with working helper functions
3. Validation that the workflow matches the diagram in Section 4.5

## Step 1: Install Required Packages

First, let's install Ax and its dependencies.

In [None]:
# Install Ax (includes BoTorch as dependency)
!pip install ax-platform

## Step 2: Import Libraries

In [None]:
import numpy as np
import pandas as pd
# Use simplified Ax imports that work with current version
from ax.service.ax_client import AxClient, ObjectiveProperties
import warnings
warnings.filterwarnings('ignore')

print("Imports successful!")

## Step 3: Define Helper Functions

These simulate the model training and A/B testing process. In production, these would integrate with your ML pipeline and experimentation platform.

In [None]:
def train_model_with_config(params):
    """
    Simulate training a model with given hyperparameters.
    
    In production, this would:
    - Load training data
    - Initialize model with hyperparameters
    - Train the model
    - Return the trained model object
    
    For simulation, we just return a dict representing the model.
    """
    model_id = f"model_{hash(str(params)) % 10000}"
    print(f"  [Training] {model_id} with params: {params}")
    return {"id": model_id, "params": params}


def simulate_true_objective(params):
    """
    Simulate the true (unknown) objective function.
    
    In reality, this is what we're trying to optimize through A/B tests.
    Here we use a synthetic function with a known optimum for validation.
    
    True optimum is approximately:
    - learning_rate: 0.01-0.03
    - max_depth: 5-6
    - min_samples_split: 5-8
    - optimizer: adam
    """
    lr = params['learning_rate']
    depth = params['max_depth']
    samples = params['min_samples_split']
    opt = params['optimizer']
    
    # Synthetic objective function (CTR)
    # Optimal around lr=0.02, depth=5, samples=6, optimizer=adam
    lr_score = -100 * (np.log10(lr) + 1.7)**2  # Peak around 0.02
    depth_score = -2 * (depth - 5.5)**2  # Peak around 5-6
    samples_score = -0.5 * (samples - 6)**2  # Peak around 6
    
    # Optimizer bonus
    opt_bonus = {'adam': 0.5, 'sgd': 0.0, 'rmsprop': 0.3}[opt]
    
    # Combine scores (base CTR around 4%)
    base_ctr = 0.04
    score = base_ctr + (lr_score + depth_score + samples_score) / 1000 + opt_bonus / 100
    
    return max(0.01, score)  # Ensure positive CTR


def run_abn_test(trained_models, noise_level=0.0005):
    """
    Simulate running an A/B/n test with multiple model variants.
    
    In production, this would:
    - Deploy models to experimentation platform
    - Allocate traffic to each variant
    - Wait for statistical significance
    - Collect metrics (mean + standard error)
    
    For simulation, we:
    - Use the synthetic objective function
    - Add noise to simulate variance
    - Calculate standard error based on typical sample sizes
    """
    results = {}
    
    for trial_index, model in trained_models:
        params = model['params']
        
        # Simulate true CTR with noise
        true_ctr = simulate_true_objective(params)
        observed_ctr = true_ctr + np.random.normal(0, noise_level)
        
        # Simulate standard error (typical for A/B test with ~10k users per arm)
        sem = noise_level + np.random.uniform(0, 0.0002)
        
        results[trial_index] = {
            'ctr_mean': observed_ctr,
            'ctr_sem': sem
        }
        
        print(f"  [A/B Test] Trial {trial_index}: CTR = {observed_ctr:.4f} ± {sem:.4f}")
    
    return results


def check_convergence(client, threshold=0.0001):
    """
    Check if Bayesian Optimization has converged.
    
    In production, you might check:
    - Expected Improvement falls below threshold
    - No improvement in best value for N iterations
    - Target metric value achieved
    
    For simplicity, we return False to always run the full budget.
    """
    # Could implement: check if max EI across search space < threshold
    return False


def deploy_to_production(model):
    """
    Deploy the final optimized model to production.
    
    In production, this would:
    - Save model artifacts
    - Update model serving infrastructure
    - Monitor deployment health
    """
    print(f"\n[Deployment] Model {model['id']} deployed to production!")
    print(f"[Deployment] Final hyperparameters: {model['params']}")


print("Helper functions defined successfully!")

## Step 4: Configure Bayesian Optimization

This matches the code example from Chapter 17, Section 5.

In [None]:
# Step 3: Initialize Ax client with automatic generation strategy
# Ax will automatically use SOBOL for initialization then switch to GPEI for BO
ax_client = AxClient(verbose_logging=False)

ax_client.create_experiment(
    name="model_hyperparameter_tuning",
    parameters=[
        {"name": "learning_rate", "type": "range", "bounds": [1e-4, 1e-1], 
         "value_type": "float", "log_scale": True},
        {"name": "max_depth", "type": "range", "bounds": [3, 10], 
         "value_type": "int"},
        {"name": "min_samples_split", "type": "range", "bounds": [2, 20], 
         "value_type": "int"},
        {"name": "optimizer", "type": "choice", "values": ["adam", "sgd", "rmsprop"]},
    ],
    objectives={"ctr": ObjectiveProperties(minimize=False)},  # Maximize CTR
    choose_generation_strategy_kwargs={
        "num_initialization_trials": 5,  # SOBOL initialization trials
    }
)

print("Ax client configured successfully!")
print(f"Generation Strategy: SOBOL (5 trials) → GP-based BO")

## Step 5: Run Bayesian Optimization Loop

This executes the workflow from the diagram in Section 4.5:
1. InitBatch / OptimizeBatch
2. Batch Train Models
3. Create A/B/n Test & Deploy
4. Monitor & Collect Results
5. Update GP
6. Check Budget

In [None]:
# Step 5: Optimization loop (matches diagram flow)
total_budget = 20  # Maximum number of trials
batch_size = 3  # Number of configurations to evaluate in parallel

print("="*80)
print("STARTING BAYESIAN OPTIMIZATION")
print("="*80)

trials_completed = 0
iteration = 0

while trials_completed < total_budget:
    iteration += 1
    print(f"\n{'='*80}")
    print(f"ITERATION {iteration}")
    print(f"{'='*80}")
    
    # Generate batch of configurations (InitBatch or OptimizeBatch from diagram)
    print("\n[Step 1] Generate Batch of Configurations")
    trial_configs = []
    current_batch_size = min(batch_size, total_budget - trials_completed)
    
    for i in range(current_batch_size):
        parameters, trial_index = ax_client.get_next_trial()
        trial_configs.append((trial_index, parameters))
        print(f"  Trial {trial_index}: {parameters}")
    
    # Batch Train Models: Train all models with selected hyperparameters
    print("\n[Step 2] Batch Train Models")
    trained_models = []
    for trial_index, params in trial_configs:
        model = train_model_with_config(params)
        trained_models.append((trial_index, model))
    
    # Create A/B/n Test & Deploy: Deploy all models to experimentation platform
    print("\n[Step 3] Create A/B/n Test & Deploy All Models")
    experiment_results = run_abn_test(trained_models)
    
    # Monitor & Collect Results: Wait for statistical significance
    print("\n[Step 4] Update GP with Results")
    for trial_index, model in trained_models:
        ctr_mean = experiment_results[trial_index]["ctr_mean"]
        ctr_sem = experiment_results[trial_index]["ctr_sem"]
        
        # Update GP with observation
        ax_client.complete_trial(
            trial_index=trial_index,
            raw_data={"ctr": (ctr_mean, ctr_sem)}  # Mean and SEM
        )
        print(f"  Trial {trial_index} completed and added to GP")
        trials_completed += 1
    
    # CheckBudget: Evaluate convergence
    if check_convergence(ax_client):
        print(f"\nConverged after {trials_completed} trials")
        break

print("\n" + "="*80)
print("OPTIMIZATION COMPLETE")
print(f"Total trials: {trials_completed}")
print("="*80)

## Step 6: Select Best Configuration and Deploy

In [None]:
# Step 6: Select Best Configuration
print("\n[Step 5] Select Best Configuration")
best_parameters, metrics = ax_client.get_best_parameters()

print(f"\nBest configuration found:")
for param, value in best_parameters.items():
    print(f"  {param}: {value}")

print(f"\nExpected CTR: {metrics[0]['ctr']:.4f} ± {metrics[1]['ctr']['ctr']:.4f}")

# Step 7: Train Final Production Model
print("\n[Step 6] Train Final Production Model")
final_model = train_model_with_config(best_parameters)
deploy_to_production(final_model)

## Step 7: Visualize Optimization Progress

In [None]:
import matplotlib.pyplot as plt

# Get optimization trace
trials_df = ax_client.get_trials_data_frame()

# Create figure with subplots
fig, axes = plt.subplots(2, 2, figsize=(14, 10))
fig.suptitle('Bayesian Optimization Results', fontsize=16, fontweight='bold')

# Plot 1: CTR over trials
ax1 = axes[0, 0]
ax1.plot(trials_df.index, trials_df['ctr'], 'o-', alpha=0.6, label='Observed CTR')
ax1.axhline(y=trials_df['ctr'].max(), color='r', linestyle='--', label='Best CTR', alpha=0.5)
ax1.set_xlabel('Trial Number')
ax1.set_ylabel('CTR')
ax1.set_title('CTR Performance Over Trials')
ax1.legend()
ax1.grid(True, alpha=0.3)

# Plot 2: Best CTR so far (cumulative max)
ax2 = axes[0, 1]
best_so_far = trials_df['ctr'].cummax()
ax2.plot(best_so_far.index, best_so_far, 'g-', linewidth=2)
ax2.fill_between(best_so_far.index, best_so_far, alpha=0.3)
ax2.set_xlabel('Trial Number')
ax2.set_ylabel('Best CTR So Far')
ax2.set_title('Optimization Progress (Cumulative Best)')
ax2.grid(True, alpha=0.3)

# Plot 3: Learning rate exploration
ax3 = axes[1, 0]
colors = plt.cm.viridis(np.linspace(0, 1, len(trials_df)))
scatter = ax3.scatter(trials_df['learning_rate'], trials_df['ctr'], 
                     c=trials_df.index, cmap='viridis', s=100, alpha=0.6, edgecolors='black')
ax3.set_xscale('log')
ax3.set_xlabel('Learning Rate (log scale)')
ax3.set_ylabel('CTR')
ax3.set_title('Learning Rate vs CTR')
ax3.grid(True, alpha=0.3)
plt.colorbar(scatter, ax=ax3, label='Trial Number')

# Plot 4: Max depth exploration
ax4 = axes[1, 1]
scatter2 = ax4.scatter(trials_df['max_depth'], trials_df['ctr'], 
                      c=trials_df.index, cmap='viridis', s=100, alpha=0.6, edgecolors='black')
ax4.set_xlabel('Max Depth')
ax4.set_ylabel('CTR')
ax4.set_title('Max Depth vs CTR')
ax4.grid(True, alpha=0.3)
plt.colorbar(scatter2, ax=ax4, label='Trial Number')

plt.tight_layout()

# Save the figure
plt.savefig('../Images/ch17_bayesian_optimization_results.png', dpi=150, bbox_inches='tight')
print("Figure saved to: ../Images/ch17_bayesian_optimization_results.png")

plt.show()

print("\n" + "="*80)
print("VISUALIZATION COMPLETE")
print("="*80)

## Step 8: Analysis and Validation

Let's verify that Bayesian Optimization performed better than random search.

In [None]:
# Compare with random search baseline
print("\n" + "="*80)
print("COMPARISON: Bayesian Optimization vs Random Search")
print("="*80)

# Bayesian Optimization results
bo_best_ctr = trials_df['ctr'].max()
bo_trials = len(trials_df)
bo_avg_ctr = trials_df['ctr'].mean()

# Simulate random search with same budget
np.random.seed(42)
random_ctrs = []
for _ in range(bo_trials):
    random_params = {
        'learning_rate': np.random.uniform(1e-4, 1e-1),
        'max_depth': np.random.randint(3, 11),
        'min_samples_split': np.random.randint(2, 21),
        'optimizer': np.random.choice(['adam', 'sgd', 'rmsprop'])
    }
    ctr = simulate_true_objective(random_params)
    random_ctrs.append(ctr)

random_best_ctr = max(random_ctrs)
random_avg_ctr = np.mean(random_ctrs)

print(f"\nBayesian Optimization:")
print(f"  Trials: {bo_trials}")
print(f"  Best CTR: {bo_best_ctr:.4f}")
print(f"  Average CTR: {bo_avg_ctr:.4f}")

print(f"\nRandom Search (baseline):")
print(f"  Trials: {bo_trials}")
print(f"  Best CTR: {random_best_ctr:.4f}")
print(f"  Average CTR: {random_avg_ctr:.4f}")

improvement = ((bo_best_ctr - random_best_ctr) / random_best_ctr) * 100
avg_improvement = ((bo_avg_ctr - random_avg_ctr) / random_avg_ctr) * 100

print(f"\nImprovement:")
print(f"  Best CTR improvement: {improvement:+.2f}%")
print(f"  Average CTR improvement: {avg_improvement:+.2f}%")

if improvement > 0:
    print(f"\n✓ Bayesian Optimization found a better solution than random search!")
else:
    print(f"\n⚠ Random search performed better (may need more trials or tuning)")

# Check if we found near-optimal solution
optimal_ctr = simulate_true_objective({
    'learning_rate': 0.02,
    'max_depth': 5,
    'min_samples_split': 6,
    'optimizer': 'adam'
})

optimality_gap = ((optimal_ctr - bo_best_ctr) / optimal_ctr) * 100

print(f"\nOptimality Analysis:")
print(f"  True optimal CTR (known): {optimal_ctr:.4f}")
print(f"  Found CTR: {bo_best_ctr:.4f}")
print(f"  Optimality gap: {optimality_gap:.2f}%")

if optimality_gap < 5:
    print(f"\n✓ Found near-optimal solution (within 5% of true optimum)!")
else:
    print(f"\n⚠ Solution is {optimality_gap:.1f}% away from optimum (may need more trials)")