# Agent Pool Play

Take a list of local agents and play them against each other a certain amount of times and store the results. Will help us tell how good our agent is doing, and see improvements if any

In [3]:
#!pip3 install pandas

In [1]:
import pandas as pd
import datetime as dt
import os
import itertools
import time
from kaggle_environments import make
from kaggle_environments.envs.football.helpers import *
from math import sqrt

AGENTS_DIR = '../submit_agents/'
EXPORT_DIR = 'pool_play_results/'
NUM_POOL_PLAYS = 1

agents = [
    'tunable-baseline-bot/submission_v6.py',
    'best-open-rules-bot/submission_v2.py',
    'gfootball-with-memory-patterns/submission_v15.py',
    'gfootball-with-memory-patterns/submission_v28.py',
    'smart-control-strategy/submission_v3.py'
]

env_config = {
    "save_video": False,
    "scenario_name": "11_vs_11_kaggle",
    "running_in_notebook": True,
    "episodeSteps": 100
}


def run_pool_play():
    print('run start')
    start_time = time.time()
    
    agents_dirs = [AGENTS_DIR + x for x in agents]
    all_agents_dirs_combo = list(itertools.combinations(agents_dirs,2))
    
    env = make("football", configuration=env_config, debug=False)
    
    
    df_list = []

    for pool_play_round in range(NUM_POOL_PLAYS):
        for agent1, agent2 in all_agents_dirs_combo:
            env.reset()
            output = env.run([agent1, agent2])

            final_output = output[-1]
            left_agent_foutput = final_output[0]
            right_agent_foutput = final_output[1]
            left_reward = left_agent_foutput['reward']
            right_reward = right_agent_foutput['reward']
            left_status = left_agent_foutput['status']
            right_status = right_agent_foutput['status']

            left_score = output[-1][0]['observation']['players_raw'][0]['score'][0]
            right_score = output[-1][0]['observation']['players_raw'][0]['score'][1]

            adf = pd.DataFrame()
            adf['round'] = [pool_play_round]
            adf['left_agent'] = [agent1.replace(AGENTS_DIR, '')]
            adf['right_agent'] = [agent2.replace(AGENTS_DIR, '')]
            adf['left_score'] = [left_score]
            adf['right_score'] = [right_score]
            adf['left_reward'] = [left_reward]
            adf['right_reward'] = [right_reward]
            adf['left_status'] = [left_status]
            adf['right_status'] = [right_status]

            df_list.append(adf)

        pool_play_round +=1
        
    
    fdf = pd.concat(df_list)
    
    # make export directory w/ timestamp of runs
    curr_datetime = dt.datetime.now()
    curr_time = curr_datetime.strftime('%d-%m-%Y-%H-%M-%S')
    export_fdir = EXPORT_DIR + curr_time
    os.mkdir(export_fdir)
    
    # write out results
    fdf.to_csv(export_fdir + '/results.csv', index=False)
    
    
    # write out config
    config_df = pd.DataFrame(env_config.items())
    config_df = config_df.append([['num_pool_plays', NUM_POOL_PLAYS]])
    config_df.to_csv(export_fdir + '/config.csv', index=False)
    
    end_time = round((time.time() - start_time), 2)
    print("complete: --- %s seconds ---" % end_time)
    
    return

In [2]:
run_pool_play()

run start
complete: --- 10.06 seconds ---


## Test

In [16]:
output[-1][0]['observation']['players_raw'][0]['steps_left']

2002

In [17]:
output[-1][0]['observation']['players_raw'][0]['score']

[1, 1]

In [147]:
curr_datetime = dt.datetime.now()
run_timestamp_id = dt.datetime.now().strftime('%Y%m%d%H%M%S%f')
atimestamp = curr_datetime.strftime('%d-%m-%Y-%H-%M-%S')
print(atimestamp)

26-10-2020-04-40-44


In [52]:
%%time
env = make("football", configuration=env_config, debug=True)
output = env.run([agents_dirs[0], agents_dirs[1]])

Staring a new environment f42e1e81-d9bc-4935-849c-81e3bc22d2c8: with scenario: 11_vs_11_kaggle
Resetting environment f42e1e81-d9bc-4935-849c-81e3bc22d2c8: with scenario: 11_vs_11_kaggle
CPU times: user 43.3 s, sys: 1.9 s, total: 45.2 s
Wall time: 43.3 s


In [53]:
len(output)

3002

Capture some output

In [54]:
final_output = output[-1]
left_agent_foutput = final_output[0]
right_agent_foutput = final_output[1]
left_agent_reward = left_agent_foutput['reward']
right_agent_reward = right_agent_foutput['reward']
left_agent_status = left_agent_foutput['status']
right_agent_status = right_agent_foutput['status']
left_agent_info = left_agent_foutput['info']
right_agent_info = right_agent_foutput['info']

In [55]:
adf = pd.DataFrame()
adf['left_agent'] = [agents[0]]
adf['right_agent'] = [agents[1]]
adf['left_agent_reward'] = [left_agent_reward]
adf['right_agent_reward'] = [right_agent_reward]
adf['left_agent_status'] = [left_agent_status]
adf['right_agent_status'] = [right_agent_status]

In [56]:
adf

Unnamed: 0,left_agent,right_agent,left_agent_reward,right_agent_reward,left_agent_status,right_agent_status
0,tunable-baseline-bot/submission_v6.py,best-open-rules-bot/submission_v2.py,-1,1,DONE,DONE
