## Reference Notebook

I took some sample from amazing notebooks, 

[Santa 2020: epsilon-greedy starter](https://www.kaggle.com/xhlulu/santa-2020-epsilon-greedy-starter)

[Santa 2020 Starter](https://www.kaggle.com/isaienkov/santa-2020-starter)

The purpose is to figure out the decay value. 

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Preparing environment

In [None]:
!pip install kaggle-environments --upgrade -q

In [None]:
from kaggle_environments import make


# Decay = 0

In [None]:
%%writefile epsilon_greedy_0.py

import math

last_bandit = -1
total_reward = 0

sums_of_reward = None
numbers_of_selections = None

def agent(observation, configuration):    
    global sums_of_reward, numbers_of_selections, last_bandit, total_reward

    if observation.step == 0:
        numbers_of_selections = [0] * configuration["banditCount"]
        sums_of_reward = [0] * configuration["banditCount"]

    if last_bandit > -1:
        reward = observation.reward - total_reward
        sums_of_reward[last_bandit] += reward
        total_reward += reward

    bandit = 0
    max_upper_bound = 0
    for i in range(0, configuration.banditCount):
        if (numbers_of_selections[i] > 0):
            upper_bound = sums_of_reward[i] / numbers_of_selections[i]
        else:
            upper_bound = 1e400
        if upper_bound > max_upper_bound and last_bandit != i:
            max_upper_bound = upper_bound
            bandit = i
            last_bandit = bandit

    numbers_of_selections[bandit] += 1

    if bandit is None:
        bandit = 0

    return bandit

# Various Decay

In [None]:
%%writefile epsilon_greedy_decaying.py

#decay_rates = [0.99, 0.98, 0.97, 0.96, 0.95]

last_bandit = -1
total_reward = 0

sums_of_reward = None
numbers_of_selections = None
    
def agent(observation, configuration):    
    global sums_of_reward, numbers_of_selections, last_bandit, total_reward

    if observation.step == 0:
        numbers_of_selections = [0] * configuration["banditCount"]
        sums_of_reward = [0] * configuration["banditCount"]

    if last_bandit > -1:
        reward = observation.reward - total_reward
        sums_of_reward[last_bandit] += reward
        total_reward += reward

    eps_2 = 0

    bandit = 0
    max_upper_bound = 0
    for i in range(0, configuration.banditCount):
        if (numbers_of_selections[i] > 0):
            eps_2 += 1
            decay = 0.99*(0.001*eps_2) ** numbers_of_selections[i]
            upper_bound = decay * sums_of_reward[i] / numbers_of_selections[i]
        else:
            upper_bound = 1e400
        if upper_bound > max_upper_bound and last_bandit != i:
            max_upper_bound = upper_bound
            bandit = i
            last_bandit = bandit

    numbers_of_selections[bandit] += 1

    if bandit is None:
        bandit = 0

    return bandit


    

In [None]:
env = make("mab", debug=True)
env.run(["../input/santa-2020/submission.py", "epsilon_greedy_decaying.py"])
env.render(mode="ipython", width=800, height=500)