In [None]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
import pandas as pd
from matplotlib.dates import DateFormatter
import datetime

# Set random seed for reproducibility
np.random.seed(42)

# ----------------------
# 1. Enhanced Simulated Data Generation (Highlighting Holiday Effects)
# ----------------------
def generate_shipment_data(n_periods=24*14, base_rate=0.3, trend_strength=0.0005,
                          holiday_effect=0.5, n_holidays=5):
    """Generate order fulfillment data with significant holiday anomalies"""
    # Generate timestamps
    start_time = datetime.datetime(2025, 11, 16, 0, 0)
    timestamps = [start_time + datetime.timedelta(hours=i) for i in range(n_periods)]
    
    # Generate base probabilities with trend
    time_indices = np.arange(n_periods)
    base_probs = base_rate + trend_strength * time_indices
    base_probs = np.clip(base_probs, 0.1, 0.8)
    
    # Add significant holiday effects (make anomalies more prominent)
    holiday_indices = np.random.choice(n_periods, n_holidays, replace=False)
    # Extend holiday effects to 1 hour before and after (simulate pre/post holiday fluctuations)
    extended_holidays = []
    for idx in holiday_indices:
        for delta in [-1, 0, 1]:
            if 0 <= idx + delta < n_periods:
                extended_holidays.append(idx + delta)
    # Dramatically fluctuate probabilities during holidays
    for idx in extended_holidays:
        base_probs[idx] = np.clip(base_probs[idx] + holiday_effect, 0.2, 0.9)
    
    # Generate binary outcomes
    outcomes = np.random.binomial(n=1, p=base_probs, size=n_periods)
    
    return np.array(timestamps), outcomes, base_probs, holiday_indices

# Generate data with significant anomalies (14 days)
n_periods = 24 * 14
timestamps, outcomes, true_probs, holiday_indices = generate_shipment_data(
    holiday_effect=0.4  # Strong holiday effect
)

# ----------------------
# 2. Model Definitions
# ----------------------
# 2.1 Beta-Binomial Rolling Update Model
def beta_binomial_model(data, initial_alpha=2, initial_beta=5, window_size=1):
    results = []
    current_alpha = initial_alpha
    current_beta = initial_beta
    
    for i in range(len(data)):
        # Update hourly
        k = data[i]
        n = 1
        
        # Bayesian update
        current_alpha += k
        current_beta += (n - k)
        
        # Calculate posterior statistics
        mean = current_alpha / (current_alpha + current_beta)
        lower_ci, upper_ci = stats.beta.interval(0.95, current_alpha, current_beta)
        
        results.append({
            'timestamp': timestamps[i],
            'pred': mean,
            'lower_ci': lower_ci,
            'upper_ci': upper_ci
        })
    
    return pd.DataFrame(results)

# 2.2 Traditional Sliding Window Model (simulating simple deep learning behavior)
def sliding_window_model(data, window_size=24*3):
    """Use 3-day moving average for prediction, simulating fixed-window characteristics of traditional models"""
    results = []
    for i in range(len(data)):
        # Use all available data for initial phase
        start_idx = max(0, i - window_size)
        window_data = data[start_idx:i]
        
        if len(window_data) == 0:
            pred = 0.5  # Initial default value
        else:
            pred = np.mean(window_data)  # Sliding window average as prediction
        
        # Traditional models rarely provide credible intervals; simulate with fixed width
        results.append({
            'timestamp': timestamps[i],
            'pred': pred,
            'lower_ci': max(0, pred - 0.15),
            'upper_ci': min(1, pred + 0.15)
        })
    
    return pd.DataFrame(results)

# ----------------------
# 3. Model Training and Prediction
# ----------------------
# Bayesian model results
bayes_results = beta_binomial_model(outcomes)

# Traditional sliding window model results
traditional_results = sliding_window_model(outcomes)

# ----------------------
# 4. Result Evaluation
# ----------------------
def calculate_mae(preds, truths):
    """Calculate Mean Absolute Error"""
    return np.mean(np.abs(preds - truths))

# Overall MAE
bayes_mae = calculate_mae(bayes_results['pred'], true_probs)
traditional_mae = calculate_mae(traditional_results['pred'], true_probs)

# Holiday period MAE (evaluate performance during anomalies)
holiday_mask = np.isin(np.arange(n_periods), holiday_indices)
bayes_holiday_mae = calculate_mae(
    bayes_results.loc[holiday_mask, 'pred'], 
    true_probs[holiday_mask]
)
traditional_holiday_mae = calculate_mae(
    traditional_results.loc[holiday_mask, 'pred'], 
    true_probs[holiday_mask]
)

# ----------------------
# 5. Visualization Comparison
# ----------------------
plt.figure(figsize=(16, 10))

# Plot true probabilities and observations
plt.plot(timestamps, true_probs, 'k--', label='True Probability', alpha=0.8)
plt.scatter(timestamps, outcomes, color='gray', alpha=0.2, s=10, label='Observations')

# Mark holidays
for idx in holiday_indices:
    plt.axvline(x=timestamps[idx], color='red', alpha=0.3, linestyle=':')
plt.text(timestamps[0], 0.95, 'Red dashed lines: Holidays', color='red', alpha=0.7)

# Plot Bayesian model results
plt.plot(bayes_results['timestamp'], bayes_results['pred'], 'b-', label='Beta-Binomial Prediction')
plt.fill_between(
    bayes_results['timestamp'],
    bayes_results['lower_ci'],
    bayes_results['upper_ci'],
    color='blue', alpha=0.1
)

# Plot traditional model results
plt.plot(traditional_results['timestamp'], traditional_results['pred'], 'g-', label='Sliding Window Prediction')
plt.fill_between(
    traditional_results['timestamp'],
    traditional_results['lower_ci'],
    traditional_results['upper_ci'],
    color='green', alpha=0.1
)

# Performance metrics annotation
metric_text = (f"Overall MAE: Bayesian={bayes_mae:.3f}, Traditional={traditional_mae:.3f}\n"
               f"Holiday MAE: Bayesian={bayes_holiday_mae:.3f}, Traditional={traditional_holiday_mae:.3f}")
plt.text(0.02, 0.02, metric_text, transform=plt.gca().transAxes,
         bbox=dict(facecolor='white', alpha=0.8), fontsize=10)

# Plot aesthetics
plt.title('Comparison of Traditional vs Beta-Binomial Models During Anomalous Periods', fontsize=14)
plt.xlabel('Time', fontsize=12)
plt.ylabel('Fulfillment Probability', fontsize=12)
plt.ylim(0, 1)
plt.legend()
plt.grid(alpha=0.3)
plt.gca().xaxis.set_major_formatter(DateFormatter('%m-%d'))
plt.gcf().autofmt_xdate()
plt.tight_layout()
plt.show()

# Print evaluation results
print(f"Overall Prediction MAE:")
print(f"Beta-Binomial Model: {bayes_mae:.4f}")
print(f"Traditional Sliding Window Model: {traditional_mae:.4f}\n")

print(f"Holiday Period Prediction MAE:")
print(f"Beta-Binomial Model: {bayes_holiday_mae:.4f}")
print(f"Traditional Sliding Window Model: {traditional_holiday_mae:.4f}")