In [1]:
import os
import sys
sys.path.append('..')

In [2]:
import pandas as pd
from IPython.display import display
%config InlineBackend.figure_format='retina'

In [3]:
from configs.structure import Config, read_config
from environment.thieves_guardians_env import TGEnv
from tuning.comparison import read_models, play_all_pairs, compute_team_results, plot_team_aggregates, plot_boxplot
from running.evaluation import record_match

## Get results

In [4]:
# Read models from disk
all_policies, model_names = read_models('/Users/stefan/Desktop/final comparisons/comparison-models', skip_start=18)
len(model_names)

48

In [5]:
model_names[:5]

['#15 - seed=2; threshold=0.8; lr=0 -- checkpoint-450',
 '#10 - seed=2; threshold=0.6; lr=0 -- checkpoint-150',
 '#4 - seed=1; threshold=0.66; mi=0.2 -- checkpoint-150',
 '#11 - seed=2; threshold=0.66; uniform=0.33 -- checkpoint-100',
 '#15 - seed=2; threshold=0.8; lr=0 -- checkpoint-250']

❗ Make sure all models in `models_dir` were trained on the same environment that is instantiated below:

In [10]:
# Re-instantiate the environment
config = read_config('/Users/stefan/Desktop/final comparisons/comparison-env-config.json')
config.time_limit = 50
env = TGEnv(config)

---

5 models for 10 episodes => 41s

52 models for 10 episodes => 52min

48 models for 3 episodes => 7 min

In [19]:
%%time
# Have all thieves play against all guardians
all_results = play_all_pairs(env, all_policies, model_names, num_episodes=3)

CPU times: user 43min 14s, sys: 12.7 s, total: 43min 26s
Wall time: 7min 16s


In [6]:
# all_results = pd.read_csv('gcp-3-results.csv')

In [20]:
all_results.to_csv('gcp-3-results.csv', index=False)

## Plot results

In [21]:
for team in ['thieves', 'guardians']:
    aggregates_df, team_rewards, models_order = plot_team_aggregates(all_results, team)
    display(aggregates_df)
    #plot_boxplot(team_rewards, models_order, team)

Unnamed: 0_level_0,avg_rewards,winrate
thieves_model,Unnamed: 1_level_1,Unnamed: 2_level_1
#1 - seed=1; threshold=0.6; scripted=0.5 -- checkpoint-100,0.9,0.86
#12 - seed=2; threshold=0.66; mi=0.2 -- checkpoint-150,0.89,0.87
#9 - seed=2; threshold=0.6; scripted=0.5 -- checkpoint-100,0.87,0.84
#7 - seed=1; threshold=0.8; lr=0 -- checkpoint-100,0.85,0.81
#7 - seed=1; threshold=0.8; lr=0 -- checkpoint-250,0.83,0.76
#9 - seed=2; threshold=0.6; scripted=0.5 -- checkpoint-150,0.82,0.73
#3 - seed=1; threshold=0.66; uniform=0.33 -- checkpoint-250,0.73,0.6
#11 - seed=2; threshold=0.66; uniform=0.33 -- checkpoint-150,0.72,0.59
#6 - seed=1; threshold=0.7; lr=0 -- checkpoint-100,0.71,0.54
#14 - seed=2; threshold=0.7; lr=0 -- checkpoint-100,0.68,0.51


Unnamed: 0_level_0,avg_rewards,winrate
guardians_model,Unnamed: 1_level_1,Unnamed: 2_level_1
scripted,0.98,0.97
#9 - seed=2; threshold=0.6; scripted=0.5 -- checkpoint-100,0.87,0.041
#10 - seed=2; threshold=0.6; lr=0 -- checkpoint-250,0.71,0.52
#8 - seed=2 -- checkpoint-150,0.71,0.54
#13 - seed=2; threshold=0.7; scripted=0.5 -- checkpoint-100,0.69,0.5
#1 - seed=1; threshold=0.6; scripted=0.5 -- checkpoint-150,0.68,0.5
#7 - seed=1; threshold=0.8; lr=0 -- checkpoint-250,0.61,0.1
#2 - seed=1; threshold=0.6; lr=0 -- checkpoint-100,0.61,0.45
#11 - seed=2; threshold=0.66; uniform=0.33 -- checkpoint-100,0.61,0.36
#4 - seed=1; threshold=0.66; mi=0.2 -- checkpoint-150,0.6,0.29


## Film strategies

In [7]:
VIDEOS_DIR = '/Users/stefan/Desktop/final comparisons/videos'

In [None]:
%%time
record_match(
    VIDEOS_DIR, env, all_policies, model_names, 
    deterministic=False, n_sampling=3,
    selected_thief_name='#1 - seed=1; threshold=0.6; scripted=0.5 -- checkpoint-100',
    selected_guard_name='#10 - seed=2; threshold=0.6; lr=0 -- checkpoint-250',
)