Initial codes are from isaienkov

In [None]:
%%writefile random_agent.py

import random

def random_agent(observation, configuration):
    return random.randrange(configuration.banditCount)

In [None]:
%%writefile ucb_agent.py

import math

last_bandit = -1
total_reward = 0

sums_of_reward = None
numbers_of_selections = None
reward_hist = []

def ucb_agent(observation, configuration):    
    global sums_of_reward, numbers_of_selections, last_bandit, total_reward
    global reward_hist 

    if observation.step == 0:
        numbers_of_selections = [0] * configuration["banditCount"]
        sums_of_reward = [0] * configuration["banditCount"]

    if last_bandit > -1:
        reward = observation.reward - total_reward
        sums_of_reward[last_bandit] += reward
        total_reward += reward
        
        reward_hist.append(reward)

    bandit = 0
    max_upper_bound = 0
    for i in range(0, configuration.banditCount):
        if (numbers_of_selections[i] > 0):
            average_reward = sums_of_reward[i] / numbers_of_selections[i]
            delta_i = math.sqrt(2 * math.log(observation.step+1) / numbers_of_selections[i])
            upper_bound = average_reward + delta_i
        else:
            upper_bound = 1e400
        if upper_bound > max_upper_bound and last_bandit != i:
            max_upper_bound = upper_bound
            bandit = i
            last_bandit = bandit

    numbers_of_selections[bandit] += 1

    if bandit is None:
        bandit = 0

    return bandit

In [None]:
!pip install kaggle-environments --upgrade

In [None]:
import matplotlib.pyplot as plt
colors_db = ['g','b']
def plot_final_rewards(hist):
    num_episodes = 0
    
    plt.figure(figsize=(12,8))
    for i,agent in enumerate(hist.keys()):
        plt.plot(hist[agent], label=agent, color=colors_db[i])
        num_episodes = len(hist[agent])
        avg_final_reward = np.array(hist[agent]).mean()
        plt.plot([0, num_episodes-1],[avg_final_reward, avg_final_reward], label=agent+' avg.', color=colors_db[i],linestyle='dashed')
        
    plt.legend(bbox_to_anchor=(1.2, 0.5))
    plt.xlabel("Iterations")
    plt.ylabel("Final Reward")
    plt.title("Final Agent Rewards for " 
              + str(num_episodes) + " Episodes")
    plt.show()

In [None]:
from kaggle_environments import make
from collections import defaultdict
from tqdm import tqdm
import numpy as np

In [None]:
hist = defaultdict(list)
num_trails = 10

for i in tqdm(range(num_trails)):
    env = make("mab")
    env.run(["random_agent.py", "ucb_agent.py"])
    hist['random_agent'].append(env.state[0]['reward'])
    hist['ucb_agent'].append(env.state[1]['reward'])
    

plot_final_rewards(hist)

# UCB vs Bayesian

In [None]:
%%writefile bayesian_sub.py

import numpy as np
from scipy.stats import beta

ps_a = None
post_b = None
bandit = None
total_reward = 0


def agent(observation, configuration):
    global reward_sums, total_reward, bandit, post_a, post_b
    
    n_bandits = configuration.banditCount

    if observation.step == 0:
        post_a = np.ones(n_bandits)
        post_b = np.ones(n_bandits)
    else:
        r = observation.reward - total_reward
        total_reward = observation.reward

        post_a[bandit] += r
        post_b[bandit] += (1 - r)

    
    bound = post_a / (post_a + post_b).astype(float) + beta.std(post_a, post_b) * 21
    bandit = int(np.argmax(bound))
    
    return bandit

In [None]:
hist = defaultdict(list)
num_trails = 10

for i in tqdm(range(num_trails)):
    env = make("mab")
    env.run(["bayesian_sub.py", "ucb_agent.py"])
    hist['bayesian_sub'].append(env.state[0]['reward'])
    hist['ucb_agent'].append(env.state[1]['reward'])
    

plot_final_rewards(hist)

In [None]:
hist = defaultdict(list)
num_trails = 100
env = make("mab")
for i in tqdm(range(num_trails)):
    env.reset()
    env.run(["bayesian_sub.py", "ucb_agent.py"])
    hist['bayesian_sub'].append(env.state[0]['reward'])
    hist['ucb_agent'].append(env.state[1]['reward'])
    

plot_final_rewards(hist)

## Performance of both UCB & Bayesian are almost similar