# Multi-Armed Bandit Comparison

Compare Thompson Sampling, UCB1, and Epsilon-Greedy algorithms

In [None]:
import sys
sys.path.append('..')

from bandits.algorithms import simulate_test
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

## Scenario: Testing 2 Variants

- Variant A: 10% conversion
- Variant B: 12% conversion
- 10,000 trials

In [None]:
true_rates = [0.10, 0.12]
n_trials = 10000

algorithms = ['thompson', 'ucb', 'epsilon']
results = {}

for alg in algorithms:
    results[alg] = simulate_test(true_rates, n_trials, algorithm=alg)
    
print("âœ… Simulations complete")

## Results Comparison

In [None]:
for alg in algorithms:
    r = results[alg]
    print(f"\n{alg.upper()}:")
    print(f"  Regret: {r['regret']:.0f} ({r['regret_pct']:.2f}%)")
    print(f"  Pulls: {r['pulls']}")
    print(f"  Detected best: Variant {r['best_arm']}")

## Visualization

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(12, 4))

# Regret comparison
regrets = [results[alg]['regret'] for alg in algorithms]
axes[0].bar(algorithms, regrets)
axes[0].set_title('Cumulative Regret')
axes[0].set_ylabel('Regret')

# Traffic allocation
x = np.arange(len(algorithms))
width = 0.35
axes[1].bar(x - width/2, [results[alg]['pulls'][0] for alg in algorithms], 
            width, label='Variant A')
axes[1].bar(x + width/2, [results[alg]['pulls'][1] for alg in algorithms], 
            width, label='Variant B')
axes[1].set_title('Traffic Allocation')
axes[1].set_xticks(x)
axes[1].set_xticklabels(algorithms)
axes[1].legend()

plt.tight_layout()
plt.savefig('../assets/algorithm_comparison.png', dpi=300, bbox_inches='tight')
plt.show()

## Conclusion

Thompson Sampling typically achieves:
- Lowest regret
- Faster convergence to best variant
- Natural exploration-exploitation balance

UCB1 is deterministic and provides theoretical guarantees.
Epsilon-Greedy is simple but less efficient.