References:
* [Santa 2020 starter](https://www.kaggle.com/isaienkov/santa-2020-starter/): Re-used writefile magic command and make_env function for creating a simulation.  

In [None]:
!pip install kaggle-environments --upgrade -q

## Thompson Sampling

Based on Lilian's blog post: https://lilianweng.github.io/lil-log/2018/01/23/the-multi-armed-bandit-problem-and-its-solutions.html

In [None]:
%%writefile thompson.py

import numpy as np

post_a = None
post_b = None
bandit = None
total_reward = 0
c = 3


def agent(observation, configuration):
    global reward_sums, total_reward, bandit, post_a, post_b, c
    
    n_bandits = configuration.banditCount

    if observation.step == 0:
        post_a = np.ones(n_bandits)
        post_b = np.ones(n_bandits)
    else:
        r = observation.reward - total_reward
        total_reward = observation.reward

        # Update Gaussian posterior
        post_a[bandit] += r
        post_b[bandit] += (1 - r)

    samples = np.random.beta(post_a, post_b)
    bandit = int(np.argmax(samples))
    
    return bandit

## Simulations

In [None]:
from kaggle_environments import make

env = make("mab", debug=True)

In [None]:
env.run(["../input/santa-2020/submission.py", "thompson.py"])
env.render(mode="ipython", width=800, height=500)

## 5-round comparison

In [None]:
def print_rounds(file1, file2, N=5):
    env = make("mab", debug=True)

    for i in range(N):
        env.run([file1, file2])
        p1_score = env.steps[-1][0]['reward']
        p2_score = env.steps[-1][1]['reward']
        env.reset()
        print(f"Round {i+1}: {p1_score} - {p2_score}")

In [None]:
print('Default vs Thompson Sampling')
print_rounds("../input/santa-2020/submission.py", "thompson.py")

In [None]:
print('Bayesian UCB vs Thompson Sampling')
print_rounds("../input/santa-2020-ucb-and-bayesian-ucb-starter/bayesian_ucb.py", "thompson.py")

In [None]:
print('epsilon-greedy+decay vs Thompson Sampling')
print_rounds("../input/santa-2020-epsilon-greedy-starter/epsilon_greedy_decay.py", "thompson.py")