# Keep pulling same bandit as long as reward keeps coming!

In [None]:
!pip install kaggle-environments --upgrade

In [None]:
%%writefile submission.py

import numpy as np
import pandas as pd
import random, os, datetime

total_reward = 0
bandit_dict = {}

def set_seed(my_seed=42):
    os.environ['PYTHONHASHSEED'] = str(my_seed)
    random.seed(my_seed)
    np.random.seed(my_seed)

def get_next_bandit():
    best_bandit = 0
    best_bandit_expected = 0
    for bnd in bandit_dict:
        expect = (bandit_dict[bnd]['win'] - bandit_dict[bnd]['loss'] + bandit_dict[bnd]['opp'] - (bandit_dict[bnd]['opp']>0)*1.5) \
                 / (bandit_dict[bnd]['win'] + bandit_dict[bnd]['loss'] + bandit_dict[bnd]['opp'])
        if expect > best_bandit_expected:
            best_bandit_expected = expect
            best_bandit = bnd
    return best_bandit

def multi_armed_probabilities(observation, configuration):
    global total_reward, bandit_dict

    my_pull = random.randrange(configuration['banditCount'])
    if 0 == observation['step']:
        set_seed()
        total_reward = 0
        bandit_dict = {}
        for i in range(configuration['banditCount']):
            bandit_dict[i] = {'win': 1, 'loss': 0, 'opp': 0}
    else:
        last_reward = observation['reward'] - total_reward
        total_reward = observation['reward']
        
        my_idx = observation['agentIndex']
        if 0 < last_reward:
            bandit_dict[observation['lastActions'][my_idx]]['win'] = bandit_dict[observation['lastActions'][my_idx]]['win'] +1
        else:
            bandit_dict[observation['lastActions'][my_idx]]['loss'] = bandit_dict[observation['lastActions'][my_idx]]['loss'] +1
        bandit_dict[observation['lastActions'][1-my_idx]]['opp'] = bandit_dict[observation['lastActions'][1-my_idx]]['opp'] +1
        my_pull = get_next_bandit()
    
    return my_pull

In [None]:
%%writefile random_agent.py

import random

def random_agent(observation, configuration):
    return random.randrange(configuration.banditCount)

In [None]:
from kaggle_environments import make
env = make("mab", debug=True)

In [None]:
import datetime

env.reset()
start_time = datetime.datetime.now()
env.run(["random_agent.py", "submission.py"])
stop_time = datetime.datetime.now()
print('Completed sub vs random:', stop_time-start_time)
env.render(mode="ipython", width=800, height=400)

In [None]:
import datetime

env.reset()
start_time = datetime.datetime.now()
env.run(["submission.py", "submission.py"])
stop_time = datetime.datetime.now()
print('Completed sub vs sub:', stop_time-start_time)
env.render(mode="ipython", width=800, height=400)