References:

* Santa 2020 starter: Re-used writefile magic command and make_env function for creating a simulation.

In [None]:
!pip install kaggle-environments --upgrade -q

In [None]:
%%writefile agent.py

import random

def random_agent(observation, configuration):
    return random.randrange(configuration.banditCount)

In [None]:
%%writefile always_first_agent.py

def always_first(observation, configuration):
    return 0

In [None]:
%%writefile sample_agent.py

import math

last_bandit = -1
total_reward = 0

sums_of_reward = None
numbers_of_selections = None

def ucb_agent(observation, configuration):    
    global sums_of_reward, numbers_of_selections, last_bandit, total_reward

    if observation.step == 0:
        numbers_of_selections = [0] * configuration["banditCount"]
        sums_of_reward = [0] * configuration["banditCount"]

    if last_bandit > -1:
        reward = observation.reward - total_reward
        sums_of_reward[last_bandit] += reward
        total_reward += reward

    bandit = 0
    max_upper_bound = 0
    for i in range(0, configuration.banditCount):
        if (numbers_of_selections[i] > 0):
            average_reward = sums_of_reward[i] / numbers_of_selections[i]
            delta_i = math.sqrt(2 * math.log(observation.step+1) / numbers_of_selections[i])
            upper_bound = average_reward + delta_i
        else:
            upper_bound = 1e400
        if upper_bound > max_upper_bound and last_bandit != i:
            max_upper_bound = upper_bound
            bandit = i
            last_bandit = bandit

    numbers_of_selections[bandit] += 1

    if bandit is None:
        bandit = 0

    return bandit

In [None]:
%%writefile bay_sub.py

import numpy as np
from scipy.stats import beta

ps_a = None
post_b = None
bandit = None
total_reward = 0


def agent(observation, configuration):
    global reward_sums, total_reward, bandit, post_a, post_b
    
    n_bandits = configuration.banditCount

    if observation.step == 0:
        post_a = np.ones(n_bandits)
        post_b = np.ones(n_bandits)
    else:
        r = observation.reward - total_reward
        total_reward = observation.reward

        post_a[bandit] += r
        post_b[bandit] += (1 - r)

    
    bound = post_a / (post_a + post_b).astype(float) + beta.std(post_a, post_b) * 4
    bandit = int(np.argmax(bound))
    
    return bandit

In [None]:
from kaggle_environments import make

env = make("mab", debug=True)

env.run(["sample_agent.py", "bay_sub.py"])
env.render(mode="ipython", width=800, height=800)

In [None]:
print('Sant 2020')
env.run(["../input/santa-2020/submission.py", "bay_sub.py"])
env.render(mode="ipython", width=800, height=500)

In [None]:
%%writefile ucb_decay.py

import numpy as np

decay = 0.97
total_reward = 0
bandit = None

def agent(observation, configuration):
    global reward_sums, n_selections, total_reward, bandit
    
    n_bandits = configuration.banditCount

    if observation.step == 0:
        n_selections, reward_sums = np.full((2, n_bandits), 1e-32)
    else:
        reward_sums[bandit] += decay * (observation.reward - total_reward)
        total_reward = observation.reward

    avg_reward = reward_sums / n_selections    
    delta_i = np.sqrt(2 * np.log(observation.step + 1) / n_selections)
    bandit = int(np.argmax(avg_reward + delta_i))

    n_selections[bandit] += 1

    return bandit

In [None]:
%%writefile bayesian_ucb.py

import numpy as np
from scipy.stats import beta

post_a, post_b, bandit = [None] * 3
total_reward = 0
c = 3

def agent(observation, configuration):
    global total_reward, bandit, post_a, post_b, c

    if observation.step == 0:
        post_a, post_b = np.ones((2, configuration.banditCount))
    else:
        r = observation.reward - total_reward
        total_reward = observation.reward
        # Update Gaussian posterior
        post_a[bandit] += r
        post_b[bandit] += 1 - r
    
    bound = post_a / (post_a + post_b) + beta.std(post_a, post_b) * c
    bandit = int(np.argmax(bound))
    
    return bandit

In [None]:
env.reset()
env.run(["../input/santa-2020/submission.py", "ucb_decay.py"])

In [None]:
env.reset()
env.run(["../input/santa-2020/submission.py", "bayesian_ucb.py"])
env.render(mode="ipython", width=800, height=500)

In [None]:
def print_rounds(file1, file2, N=5):
    env = make("mab", debug=True)

    for i in range(N):
        env.run([file1, file2])
        p1_score = env.steps[-1][0]['reward']
        p2_score = env.steps[-1][1]['reward']
        env.reset()
        print(f"Round {i+1}: {p1_score} - {p2_score}")

In [None]:
print('Default vs UCB+decay')
print_rounds("../input/santa-2020/submission.py", "ucb_decay.py")

In [None]:
print('Default vs BayesianUCB')
print_rounds("../input/santa-2020/submission.py", "bayesian_ucb.py")