In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import sys
import glob
import pandas as pd
import os
import seaborn as sns

from tqdm import tqdm
from statsmodels.distributions.empirical_distribution import ECDF
from collections import defaultdict
import pickle
import re
import json
from pathlib import Path
import scipy.stats
import time

from open_spiel.python.algorithms.exploitability import nash_conv, best_response
from open_spiel.python.examples.ubc_plotting_utils import *
from open_spiel.python.examples.ubc_sample_game_tree import sample_game_tree, flatten_trees, flatten_tree
from open_spiel.python.examples.ubc_clusters import projectPCA, fitGMM
from open_spiel.python.examples.ubc_utils import *
import open_spiel.python.examples.ubc_dispatch as dispatch
from open_spiel.python.visualizations import ubc_treeviz


os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"
from auctions.webutils import *


from open_spiel.python.examples.ubc_cma import *

output_notebook()
from open_spiel.python.games.clock_auction_base import InformationPolicy, ActivityPolicy, UndersellPolicy, TiebreakingPolicy
from open_spiel.python.algorithms.exploitability import nash_conv, best_response
from open_spiel.python.examples.ubc_decorators import TakeSingleActionDecorator, TremblingAgentDecorator, ModalAgentDecorator

# Utilities

In [3]:
def draw_game_tree(game, policy, br_policies=None, fname='treeviz.png', figure_dir='/global/scratch/open_spiel/open_spiel/open_spiel/python/examples/notebooks/greg/figures/graphviz'):
    node_policy_decorator, edge_policy_decorator = ubc_treeviz.make_policy_decorators(policy, br_policies)
    gametree = ubc_treeviz.GameTree(
        game,
        node_decorator=node_policy_decorator,
        edge_decorator=edge_policy_decorator,
        group_infosets=False,
        group_terminal=False,
        group_pubsets=False, 
        target_pubset='*',
        depth_limit=20,
        state_prob_limit=0.001,
        # action_prob_limit=0.001, 
        policy=policy,
        br_policies=br_policies,
        transpose=True,
    )
    
    outfile = os.path.join(figure_dir, fname)
    gametree.draw(outfile, prog='dot')
    print("Game tree saved to file", outfile)
    


In [4]:
from open_spiel.python.algorithms.get_all_states import get_all_states


In [5]:
def analyze_run(experiment_name, run_name):
    run = EquilibriumSolverRun.objects.get(name=run_name, experiment__name=experiment_name)
    game, final_checkpoint, policy = get_results(run)
    print(f'Loaded data for run {run_name} in experiment {experiment_name}')

    # print run info
    print(final_checkpoint)
    print('TODO')

    draw_game_tree(game, policy, fname=f'{experiment_name}_{run_name}.pdf')

    return run, game, final_checkpoint, policy


# feb7_v1
debugging PPO crashes

In [6]:
run = EquilibriumSolverRun.objects.get(name='feb4_4t_feb4_4t_1_base_dev1000_rho0_t4_tie_break-ppo_jun8_23ppo_76-101-evalsomuch', experiment__name='feb7_v1_repro_bug')
game, final_checkpoint, policy = get_results(run)


In [7]:
s = game.new_initial_state()
for a in [2, 2, 3, 3, 3, 3]:
    s = s.child(a)
num_actions = game.num_distinct_actions()

In [21]:
actions = policy._agents[0].network.get_action_and_value(
    x=torch.tensor(s.information_state_tensor()).reshape(1, -1, num_actions).repeat(10, 1, 1),
    legal_actions_mask=torch.tensor(s.legal_actions_mask()),
)[0]
(actions == 4).any()

tensor([[0.0050, 0.0053, 0.0056, 0.0078, 0.0060, 0.0049, 0.0053, 0.0055, 0.0056,
         0.0057],
        [0.0050, 0.0053, 0.0056, 0.0078, 0.0060, 0.0049, 0.0053, 0.0055, 0.0056,
         0.0057],
        [0.0050, 0.0053, 0.0056, 0.0078, 0.0060, 0.0049, 0.0053, 0.0055, 0.0056,
         0.0057],
        [0.0050, 0.0053, 0.0056, 0.0078, 0.0060, 0.0049, 0.0053, 0.0055, 0.0056,
         0.0057],
        [0.0050, 0.0053, 0.0056, 0.0078, 0.0060, 0.0049, 0.0053, 0.0055, 0.0056,
         0.0057],
        [0.0050, 0.0053, 0.0056, 0.0078, 0.0060, 0.0049, 0.0053, 0.0055, 0.0056,
         0.0057],
        [0.0050, 0.0053, 0.0056, 0.0078, 0.0060, 0.0049, 0.0053, 0.0055, 0.0056,
         0.0057],
        [0.0050, 0.0053, 0.0056, 0.0078, 0.0060, 0.0049, 0.0053, 0.0055, 0.0056,
         0.0057],
        [0.0050, 0.0053, 0.0056, 0.0078, 0.0060, 0.0049, 0.0053, 0.0055, 0.0056,
         0.0057],
        [0.0050, 0.0053, 0.0056, 0.0078, 0.0060, 0.0049, 0.0053, 0.0055, 0.0056,
         0.0057]], grad_fn=<

tensor(False)

tensor(False)

In [14]:
run = EquilibriumSolverRun.objects.get(name='feb4_4t_feb4_4t_1_base_dev1000_rho0_t4_tie_break-ppo_jun8_23ppo_76-101', experiment__name='feb7_v1')
game = run.game.load_as_spiel()

In [19]:
states = get_all_states(game)

In [20]:
print(len(states))

30412


In [22]:
s = game.new_initial_state()
for a in [2, 2, 3, 3, 3, 3]:
    s = s.child(a)

In [24]:
s.child(4)

ValueError: Bidder 0 is not active enough (21) to bid on [0 4] with cost of 28. 
History: [2, 2, 3, 3, 3, 3]. 
Infostate string: p0t2 r3 posted[[12.0, 7.0], [12.0, 7.35], [12.0, 7.72]] sub[[0, 0], [0, 3], [0, 3]] a21 agg[[0, 0], [0, 6], [0, 6]] proc[[0, 0], [0, 3], [0, 3]]. 
Action: 4. 
Legal actions: [0, 1, 2, 3, 5, 6]. 

# feb5_v2

In [64]:
run, game, final_checkpoint, policy = analyze_run('feb5_v2', 'feb4_4t_feb4_4t_1_base_dev1000_rho1_t4-ppo_jun8_23ppo_76-108')

Loaded data for run feb4_4t_feb4_4t_1_base_dev1000_rho1_t4-ppo_jun8_23ppo_76-108 in experiment feb5_v2
feb4_4t_feb4_4t_1_base_dev1000_rho1_t4-ppo_jun8_23ppo_76-108 (feb5_v2) Iteration 159744
TODO
Game tree saved to file /global/scratch/open_spiel/open_spiel/open_spiel/python/examples/notebooks/greg/figures/graphviz/feb5_v2_feb4_4t_feb4_4t_1_base_dev1000_rho1_t4-ppo_jun8_23ppo_76-108.pdf


In [65]:
state = game.new_initial_state().child(0).child(0).child(3).child(3)#.child(3).child(3)
policy.action_probabilities(state, 0)

{3: 1.0}

In [66]:
run, game, final_checkpoint, policy = analyze_run('feb5_v2', 'feb4_4t_feb4_4t_0_base_dev1000_rho1_t4-cfr_port_10_extexternal_plus_linear-109')

Loaded data for run feb4_4t_feb4_4t_0_base_dev1000_rho1_t4-cfr_port_10_extexternal_plus_linear-109 in experiment feb5_v2
feb4_4t_feb4_4t_0_base_dev1000_rho1_t4-cfr_port_10_extexternal_plus_linear-109 (feb5_v2) Iteration 109001
TODO
Game tree saved to file /global/scratch/open_spiel/open_spiel/open_spiel/python/examples/notebooks/greg/figures/graphviz/feb5_v2_feb4_4t_feb4_4t_0_base_dev1000_rho1_t4-cfr_port_10_extexternal_plus_linear-109.pdf


In [49]:
state = game.new_initial_state().child(0).child(0).child(3).child(3)#.child(3).child(3)
policy.action_probabilities(state, 0)

{3: 1.0}

In [58]:
env_and_policy = ppo_db_checkpoint_loader(final_checkpoint)
policy = env_and_policy.make_policy()
policy.action_probabilities(state, 0)

{3: 1.0}