In [134]:
%load_ext autoreload
%autoreload 2
%pylab inline

import sys
import glob
import pandas as pd
import os
import seaborn as sns
from tqdm import tqdm_notebook as tqdm
from statsmodels.distributions.empirical_distribution import ECDF
from collections import defaultdict
import pickle
import re
import json

import bokeh
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.models import HoverTool, ColumnDataSource, ColorBar, LogColorMapper, LinearColorMapper
from bokeh.transform import linear_cmap, log_cmap
from bokeh.palettes import Category10_10

output_notebook()


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Populating the interactive namespace from numpy and matplotlib


`%matplotlib` prevents importing * from pylab and numpy
  "\n`%matplotlib` prevents importing * from pylab and numpy"


In [135]:
!ls /shared/outputs

big_game	    dec22_big_game_3	 jan13_big_handcrafted
big_game_2	    dec7_big_game_2	 jan13_medium
debugging	    dec9_big_game_2	 jan13_medium_handcrafted
dec13_big_game_2    jan10_handcrafted_5  jan13_small
dec13_big_game_3    jan10_handcrafted_6  jan13_small_handcrafted
dec14_big_game_3    jan10_handcrafted_7  jan2_big_game_2
dec16_2_big_game_2  jan11_tiny		 jan2_big_game_3
dec16_2_big_game_3  jan11_tiny_2	 jan6_big_game_2
dec16_big_game_2    jan12_big_2		 jan7_exp
dec16_big_game_3    jan12_medium	 parameters2
dec17_big_game_2    jan12_medium_2	 signal
dec17_big_game_3    jan12_small_2	 transformerdebugging
dec22_big_game_2    jan13_big


In [159]:
EXPERIMENT = 'jan13_tiny_2'
MODEL = 'lstm_deep'
# MODEL = 'mlp-lowlr'
EXPERIMENT_DIR = f'/shared/outputs/{EXPERIMENT}/{MODEL}'
GAME_CONFIG = f'{EXPERIMENT_DIR}/game.json'
with open(GAME_CONFIG, 'r') as f:
    PLAYERS = range(len(json.load(f)['players']))

In [160]:
# 1) Get all reward files
reward_files = glob.glob(EXPERIMENT_DIR + f'/evaluations/rewards_*.pkl')

In [167]:
# 2) Group them by checkpoint name (TODO: Update for new formats without horrible re)
pattern = re.compile(r'checkpoint_(\d+).*')
groups = [int(re.match(pattern, os.path.basename(reward_file).split('rewards_')[1].split('.pkl')[0]).groups()[0]) for reward_file in reward_files]
df = pd.DataFrame({'fname': reward_files, 'iteration': groups})
records = []
for iteration, iteration_df in df.groupby('iteration'):
    record = dict(t=iteration)
    
    relevant_files = iteration_df['fname'].values
    for reward_file in relevant_files:
        
        with open(reward_file, 'rb') as f:
            rewards = pickle.load(f)
            
        straightforward_agent = rewards.get('straightforward_agent')
        if straightforward_agent is not None:
            for player, values in rewards['rewards'].items():
                key = f'{player}_straightforward{straightforward_agent}'
                record[key] = np.array(values).mean()
        else:
            for player, values in rewards['rewards'].items():
                br_agent = rewards['br_agent']
                if br_agent is None:
                    key = str(player)
                else:
                    key = f'{player}_{br_agent}'
                record[key] = np.array(values).mean()
    
    records.append(record)
    
ev_df = pd.DataFrame.from_records(records)

In [168]:
ev_df

Unnamed: 0,t,0_straightforward0,1_straightforward0,0,1,0_straightforward1,1_straightforward1,0_0,1_0,0_1,1_1
0,1000,100.0,0.0,0.0,100.0,0.0,100.0,,,,
1,2000,100.0,0.0,0.0,100.0,0.0,100.0,40.1584,39.8416,0.0,0.0
2,3000,100.0,0.0,0.0,100.0,0.0,100.0,0.0,100.0,0.0,100.0
3,4000,100.0,0.0,0.0,100.0,0.0,100.0,39.696,40.304,,
4,5000,100.0,0.0,0.0,100.0,0.0,100.0,,,,


In [165]:
# Regret for not having played the best response
for p in PLAYERS:
    ev_df[f"BRRegret{p}"] = ev_df[f"{p}_{p}"] - ev_df[f"{p}"]
    ev_df[f'StraightforwardRegret{p}'] = ev_df[f"{p}_straightforward{p}"] - ev_df[f"{p}"]
    ev_df[f'Regret{p}'] = ev_df[[f'BRRegret{p}', f'StraightforwardRegret{p}']].max(axis=1)
regret_cols = [f'Regret{p}' for p in PLAYERS] 
ev_df['ApproxNashConv'] = ev_df[regret_cols].clip(0).sum(axis='columns')

In [166]:
source = ColumnDataSource(ev_df) # Need to drop tensors b/c of serialization issues
       
color = Category10_10.__iter__()


plot = figure(width=900, height=400, title=f"{MODEL} Approximate Nash Conv")

# add a circle renderer with a size, color, and alpha
for p in PLAYERS:
    plot.line('t', f'BRRegret{p}', source=source, legend_label=f'Player {p} BR Regret', color=next(color))
    plot.line('t', f'StraightforwardRegret{p}', source=source, legend_label=f'Player {p} Straightforward Regret', color=next(color))

plot.line('t', f'ApproxNashConv', source=source, legend_label=f'Approximate Nash Conv', color=next(color))
plot.legend.click_policy = "hide"
plot.xaxis.axis_label = 'Iteration'
plot.yaxis.axis_label = 'Regret'
plot.ray(x=[0], y=[0], length=0, angle=0, line_width=5, color=next(color))

plot.add_tools(HoverTool())
show(plot)




In [None]:
import matplotlib.pyplot as plt
plt.figure(figsize=(15, 5))
plt.imshow(list(q_network.output_layer.parameters())[0].data)
plt.colorbar()

In [None]:
# f = '/shared/outputs/dec13_big_game_3/lstm_deep/best_responses/checkpoint_1800000_br_0.pkl'
# a = pickle.load(open(f, 'rb'))
# from open_spiel.python.examples.ubc_nfsp_example import policy_from_checkpoint
# from open_spiel.python.examples.ubc_br import make_dqn_agent
# experiment_dir = '/shared/outputs/dec13_big_game_3/lstm_deep'
# env_and_model = policy_from_checkpoint(experiment_dir, checkpoint_suffix='checkpoint_1800000')
# game, policy, env, trained_agents, game_config = env_and_model.game, env_and_model.nfsp_policies, env_and_model.env, env_and_model.agents, env_and_model.game_config


# br_agent = make_dqn_agent(0, a['config'], env, game, game_config)
# br_agent._q_network.load_state_dict(a['agent'])

# rnn_output_list = []
# for i in range(3):
#     it = game.new_initial_state().child(i).child(0).information_state_tensor()

#     q_network = br_agent._q_network

#     info_state = q_network.prep_batch([q_network.reshape_infostate(it)])
#     print(info_state[:10])
#     q_values, rnn_outputs = q_network(info_state)
#     print(q_values)
#     rnn_output_list.append(rnn_outputs)
# #     q_values

In [19]:
import pyspiel

In [20]:
sys.path

['/apps/open_spiel/open_spiel/python/examples/notebooks',
 '/apps/open_spiel',
 '/apps/open_spiel/build/python',
 '/apps/ibm/ILOG/CPLEX_Studio129/cplex/python/3.6/x86-64_linux',
 '/apps/open_spiel/venv/lib/python36.zip',
 '/apps/open_spiel/venv/lib/python3.6',
 '/apps/open_spiel/venv/lib/python3.6/lib-dynload',
 '/usr/lib/python3.6',
 '',
 '/apps/open_spiel/venv/lib/python3.6/site-packages',
 '/apps/open_spiel/venv/lib/python3.6/site-packages/IPython/extensions',
 '/home/ubuntu/.ipython']