In [None]:
!pip install kaggle-environments --upgrade -q

In [None]:
%%writefile epsilon_greedy.py

import math
import random
import numpy as np

random.seed(2020)

epsilon = 0.15

total_reward = 0
Q = None
rewards = None
n_chosen = None
last_action = None

# agent
def agent(observation, configuration):
    global Q, rewards, total_reward, n_chosen, last_action
    # initialize
    if observation.step == 0:
        n_chosen = [0] * configuration.banditCount
        rewards = [0] * configuration.banditCount
        Q = [0] * configuration.banditCount
        total_reward = 0
        last_action = random.randint(0, configuration.banditCount - 1)
    
    # update values
    r = observation.reward - total_reward
    n_chosen[last_action] += 1
    Q[last_action] += (r - Q[last_action]) / n_chosen[last_action]
    total_reward += r
    rewards[last_action] += r
    
    # epsilon-greed algorithm
    # incremental update fashion
    
    if random.random() < epsilon:
        # random action, prob epsilon
        action = random.randint(0, configuration.banditCount - 1)
    else:
        # max Q
        action = int(np.argmax(Q))
        
    last_action = action
    
    return action

In [None]:
from kaggle_environments import make
env = make("mab", debug=True)

In [None]:
env.reset()
env.run(["../input/santa-2020/submission.py", "epsilon_greedy.py"])
env.render(mode="ipython", width=800, height=500)