In [168]:
%load_ext autoreload
%autoreload 2
%pylab inline

import sys
import glob
import pandas as pd
import os
import seaborn as sns
from tqdm import tqdm_notebook as tqdm
from statsmodels.distributions.empirical_distribution import ECDF
from collections import defaultdict
import pickle
import re
import json

import bokeh
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.models import HoverTool, ColumnDataSource, ColorBar, LogColorMapper, LinearColorMapper
from bokeh.transform import linear_cmap, log_cmap
from bokeh.palettes import Category10_10

output_notebook()


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Populating the interactive namespace from numpy and matplotlib


`%matplotlib` prevents importing * from pylab and numpy
  "\n`%matplotlib` prevents importing * from pylab and numpy"


In [197]:
EXPERIMENT = 'dec13_big_game_3'
MODEL = 'lstm_deep'
EXPERIMENT_DIR = f'/shared/outputs/{EXPERIMENT}/{MODEL}'
GAME_CONFIG = f'{EXPERIMENT_DIR}/game.json'
with open(GAME_CONFIG, 'r') as f:
    PLAYERS = range(len(json.load(f)['players']))

In [198]:
# 1) Get all reward files
reward_files = glob.glob(EXPERIMENT_DIR + f'/evaluations/rewards_*.pkl')

In [199]:
# 2) Group them by checkpoint name (TODO: Update for new formats without horrible re)
pattern = re.compile(r'checkpoint_(\d+).*')
groups = [int(re.match(pattern, os.path.basename(reward_file).split('rewards_')[1].split('.pkl')[0]).groups()[0]) for reward_file in reward_files]
df = pd.DataFrame({'fname': reward_files, 'iteration': groups})
records = []
for iteration, iteration_df in df.groupby('iteration'):
    record = dict(t=iteration)
    
    relevant_files = iteration_df['fname'].values
    for reward_file in relevant_files:
        with open(reward_file, 'rb') as f:
            rewards = pickle.load(f)

        for player, values in rewards['rewards'].items():
            br_agent = rewards['br_agent']
            if br_agent is None:
                key = str(player)
            else:
                key = f'{player}_{br_agent}'
            record[key] = np.array(values).mean()
    
    records.append(record)
    
ev_df = pd.DataFrame.from_records(records)

In [200]:
# Regret for not having played the best response
for p in PLAYERS:
    ev_df[f"Regret{p}"] = ev_df[f"{p}_{p}"] - ev_df[f"{p}"]
regret_cols = [f'Regret{p}' for p in PLAYERS]
ev_df['ApproxNashConv'] = ev_df[regret_cols].clip(0).sum(axis='columns')

In [201]:
ev_df

Unnamed: 0,t,0_1,1_1,0,1,0_0,1_0,Regret0,Regret1,ApproxNashConv
0,300000,5.120249,481.482574,187.358347,191.510252,480.37909,6.901101,293.020743,289.972322,582.993065
1,600000,5.195392,481.12202,186.011809,194.237351,477.741887,5.55839,291.730078,286.884669,578.614747
2,900000,7.656522,475.227065,194.853035,185.484187,477.243323,7.326344,282.390288,289.742878,572.133166
3,1200000,5.576247,477.062507,191.348362,191.98825,475.600877,7.286809,284.252515,285.074257,569.326772
4,1500000,6.570773,471.89771,193.919778,193.548481,470.241552,8.305496,276.321773,278.349229,554.671002
5,1800000,6.173847,475.286966,193.387186,192.242114,474.538457,5.911412,281.151271,283.044852,564.196123
6,2100000,,,198.17359,195.511806,,,,,0.0
7,2400000,,,200.449152,195.900814,,,,,0.0


In [202]:
source = ColumnDataSource(ev_df) # Need to drop tensors b/c of serialization issues
       
color = Category10_10.__iter__()


plot = figure(width=900, height=400, title=f"{MODEL} Approximate Nash Conv")

# add a circle renderer with a size, color, and alpha
for p in range(2):
    plot.line('t', f'Regret{p}', source=source, legend_label=f'Player {p} Regret', color=next(color))

plot.line('t', f'ApproxNashConv', source=source, legend_label=f'Approximate Nash Conv', color=next(color))
plot.legend.click_policy = "hide"
plot.xaxis.axis_label = 'Iteration'
plot.yaxis.axis_label = 'Regret'
plot.ray(x=[0], y=[0], length=0, angle=0, line_width=5, color=next(color))

plot.add_tools(HoverTool())
show(plot)


