The aim of this notebook is to evaluate which agent works better.


In [None]:
!pip install kaggle-environments==1.7.2 -q

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from glob import glob
import itertools
from tqdm.notebook import tqdm_notebook as tqdm

import random

In [None]:
N_MATCHES = 100

In [None]:
from kaggle_environments.envs.rps.agents import *
from kaggle_environments import make

env = make('mab', debug = True)

In [None]:
agents = sorted(glob('../**/submission.py', recursive=True) + glob('../**/sample_agent.py', recursive=True))
agents

In [None]:
agent_names = [
    'Default UCB with decay', 
    'Santa 2020: epsilon-greedy with decay', 
    'Santa 2020 Starter', 
    'Santa 2020'
]

In [None]:
agent_dict = {}
for a, b in zip(agent_names, agents):
    agent_dict[a] = b

In [None]:
def match(agent0, agent1):
        
    env.reset()
    
    env.run([agent_dict[agent0], agent_dict[agent1]])
    json = env.toJSON()
    rewards = json['rewards']
    
    # cumulative reward
    result_df.at[agent0, 'cum_reward'] += rewards[0]
    result_df.at[agent1, 'cum_reward'] += rewards[1]
    
    result_df.at[agent0, 'games'] += 1
    result_df.at[agent1, 'games'] += 1
    
    # average reward
    result_df['score'] = result_df['cum_reward'] / result_df['games']
    
    scores[agent0].append(rewards[0])
    scores[agent1].append(rewards[1])

In [None]:
result_df = pd.DataFrame(np.zeros((len(agent_names), 3)), columns=['score', 'cum_reward', 'games'], index=agent_names)
scores = {k:[] for k in agent_names}    

In [None]:
for i in range(N_MATCHES):
    agent0, agent1 = np.random.choice(agent_names, size=2, replace=False)
    match(agent0, agent1)

In [None]:
# which model works better?
# Let's plot distribution of total rewards.

fig, axes = plt.subplots(figsize = (12, 8))
median_dic = {k: np.median(scores[k]) for k in scores.keys()}
sorted_models = sorted(median_dic, key=lambda x: median_dic[x])[::-1]

plt.boxplot(tuple(scores[key] for key in sorted_models))
axes.set_xticklabels(sorted_models)

for y, i in zip(tuple(scores[key] for key in sorted_models), 
                range(len(sorted_models))):
    # "jitter" to the x-axis 
    x = np.random.normal(i+1, 0.05, size=len(y))
    plt.plot(x, y, '.', color="C{}".format(i))

plt.xticks(rotation=90)
plt.show()