In [39]:
%load_ext autoreload
%autoreload 2
%pylab inline

import sys
import glob
import pandas as pd
import os
import seaborn as sns
# from tqdm import tqdm_notebook as tqdm
from tqdm import tqdm
import pickle
from statsmodels.distributions.empirical_distribution import ECDF
from collections import defaultdict
import logging
from open_spiel.python.examples.ubc_mccfr_cpp_example import action_to_bids
from open_spiel.python.examples.ubc_nfsp_example import policy_from_checkpoint
from open_spiel.python.examples.ubc_utils import *

from open_spiel.python.pytorch.ubc_nfsp import NFSP
import bokeh
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.models import HoverTool, ColumnDataSource, ColorBar, LogColorMapper, LinearColorMapper
from bokeh.transform import linear_cmap, log_cmap

output_notebook()
# logging.basicConfig(format='%(asctime)s %(levelname)s:%(message)s', level=logging.DEBUG, datefmt='%I:%M:%S')
# logger = logging.getLogger()
# logger.setLevel(logging.INFO)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Populating the interactive namespace from numpy and matplotlib


In [40]:
EXPERIMENT_DIR = '/shared/outputs/big_game_2/lstm_deep'

In [41]:
CHECKPOINT_NAME = 'checkpoint_latest'
checkpoint = policy_from_checkpoint(EXPERIMENT_DIR, CHECKPOINT_NAME)

In [42]:
def check_on_q_values(checkpoint):
    q_network = checkpoint.agents[0]._rl_agent._q_network
    state = checkpoint.game.new_initial_state().child(0).child(0)
    legal_actions = state.legal_actions()
    it = state.information_state_tensor()
    info_state = q_network.prep_batch([q_network.reshape_infostate(it)])
    q_values = q_network(info_state).detach()[0]
    legal_q_values = q_values[legal_actions]
    action_dict = get_actions(checkpoint.game)
    return {s: q for s,q in zip(action_dict.values(), legal_q_values)}

In [38]:
actions = []
for episode in tqdm(range(1000000)):
    if episode % 50000 == 0:
        qs = check_on_q_values(checkpoint)
        print("Bid for 1,0,0 licenses @ $75 with activity 75 -", qs["Bid for 1,0,0 licenses @ $75 with activity 75"])
        print("Bid for 1,0,1 licenses @ $100 with activity 100 -", qs["Bid for 1,0,1 licenses @ $100 with activity 100"])
        print(pd.Series(actions).value_counts())
        
    time_step = checkpoint.env.reset()
    first_action = True

    while not time_step.last():
        player_id = time_step.observations["current_player"]
        agent = checkpoint.agents[player_id]
        # Player 1 is fixed
        agent_output = agent.step(time_step, is_evaluation = player_id==1) 
#         if player_id == 0 and agent_output.action != 4:
#             print(agent_output.action)
        
        if player_id == 0 and first_action:
            actions.append(agent_output.action)
            first_action = False
        action_list = [agent_output.action]
        time_step = checkpoint.env.step(action_list)

    # Episode is over, step all agents with final info state.
    for player_id, agent in enumerate(checkpoint.agents):
        agent.step(time_step)
    

  import sys
  0%|          | 17/1000000 [00:00<1:42:08, 163.16it/s]

Bid for 1,0,0 licenses @ $75 with activity 75 - tensor(44.9937)
Bid for 1,0,1 licenses @ $100 with activity 100 - tensor(15.0661)
Series([], dtype: int64)


  5%|▌         | 50018/1000000 [06:01<2:07:53, 123.80it/s]

Bid for 1,0,0 licenses @ $75 with activity 75 - tensor(45.0075)
Bid for 1,0,1 licenses @ $100 with activity 100 - tensor(15.0917)
4     49544
0       382
1        36
3         8
6         6
10        5
9         4
8         4
5         4
2         3
11        2
7         2
dtype: int64


 10%|█         | 100015/1000000 [12:07<2:03:36, 121.35it/s]

Bid for 1,0,0 licenses @ $75 with activity 75 - tensor(45.0003)
Bid for 1,0,1 licenses @ $100 with activity 100 - tensor(15.1011)
4     99049
0       794
1        65
9        15
6        12
3        12
10       11
8        10
11        9
7         8
2         8
5         7
dtype: int64


 15%|█▌        | 150022/1000000 [18:05<1:46:39, 132.83it/s]

Bid for 1,0,0 licenses @ $75 with activity 75 - tensor(45.0131)
Bid for 1,0,1 licenses @ $100 with activity 100 - tensor(15.1214)
4     148512
0       1238
1        100
9         21
10        20
3         20
7         18
8         17
2         15
6         14
11        13
5         12
dtype: int64


 20%|██        | 200004/1000000 [23:58<1:43:28, 128.86it/s]

Bid for 1,0,0 licenses @ $75 with activity 75 - tensor(45.0102)
Bid for 1,0,1 licenses @ $100 with activity 100 - tensor(15.1434)
4     197913
0       1744
1        138
3         29
10        26
9         26
7         25
8         24
2         21
11        19
6         18
5         17
dtype: int64


 25%|██▌       | 250002/1000000 [29:52<1:39:16, 125.91it/s]

Bid for 1,0,0 licenses @ $75 with activity 75 - tensor(44.9981)
Bid for 1,0,1 licenses @ $100 with activity 100 - tensor(15.1650)
4     247363
0       2200
1        187
9         35
10        34
3         31
7         28
11        26
8         24
6         24
5         24
2         24
dtype: int64


 30%|███       | 300015/1000000 [35:46<1:38:17, 118.70it/s]

Bid for 1,0,0 licenses @ $75 with activity 75 - tensor(44.9993)
Bid for 1,0,1 licenses @ $100 with activity 100 - tensor(15.1979)
4     296795
0       2691
1        221
9         43
3         36
10        35
7         34
11        31
5         30
8         28
6         28
2         28
dtype: int64


 35%|███▌      | 350001/1000000 [41:42<1:28:14, 122.77it/s]

Bid for 1,0,0 licenses @ $75 with activity 75 - tensor(44.9915)
Bid for 1,0,1 licenses @ $100 with activity 100 - tensor(15.2180)
4     346262
0       3147
1        259
9         49
3         43
10        40
8         37
7         35
5         34
11        32
2         32
6         30
dtype: int64


 40%|███▉      | 399993/1000000 [47:41<1:12:21, 138.21it/s]

Bid for 1,0,0 licenses @ $75 with activity 75 - tensor(44.9993)
Bid for 1,0,1 licenses @ $100 with activity 100 - tensor(15.2332)
4     395739
0       3587
1        302
9         51
10        49
3         45
8         41
5         41
7         39
11        38
2         37
6         31
dtype: int64


 45%|████▍     | 449999/1000000 [53:36<1:04:37, 141.86it/s]

Bid for 1,0,0 licenses @ $75 with activity 75 - tensor(44.9881)
Bid for 1,0,1 licenses @ $100 with activity 100 - tensor(15.2514)
4     445227
0       4025
1        340
10        53
9         52
7         48
3         48
8         46
5         44
11        43
2         38
6         36
dtype: int64


 50%|█████     | 500001/1000000 [59:34<1:11:54, 115.88it/s]

Bid for 1,0,0 licenses @ $75 with activity 75 - tensor(44.9945)
Bid for 1,0,1 licenses @ $100 with activity 100 - tensor(15.2702)
4     494735
0       4441
1        381
10        58
9         55
3         54
7         52
8         49
5         48
11        45
2         42
6         40
dtype: int64


 55%|█████▌    | 550001/1000000 [1:05:27<1:05:04, 115.24it/s]

Bid for 1,0,0 licenses @ $75 with activity 75 - tensor(44.9964)
Bid for 1,0,1 licenses @ $100 with activity 100 - tensor(15.2841)
4     544242
0       4863
1        419
10        63
9         60
3         55
7         54
5         53
8         51
11        48
2         47
6         45
dtype: int64


 60%|██████    | 600004/1000000 [1:11:25<1:03:20, 105.23it/s]

Bid for 1,0,0 licenses @ $75 with activity 75 - tensor(45.0054)
Bid for 1,0,1 licenses @ $100 with activity 100 - tensor(15.3113)
4     593739
0       5287
1        450
9         67
10        66
3         63
5         60
7         58
8         54
11        53
6         53
2         50
dtype: int64


 65%|██████▌   | 650012/1000000 [1:17:23<53:20, 109.37it/s]  

Bid for 1,0,0 licenses @ $75 with activity 75 - tensor(44.9947)
Bid for 1,0,1 licenses @ $100 with activity 100 - tensor(15.3417)
4     643205
0       5731
1        487
9         72
5         69
10        68
3         67
8         64
7         61
6         60
11        59
2         57
dtype: int64


 70%|███████   | 700000/1000000 [1:23:26<41:55, 119.27it/s] 

Bid for 1,0,0 licenses @ $75 with activity 75 - tensor(44.9818)
Bid for 1,0,1 licenses @ $100 with activity 100 - tensor(15.3591)
4     692727
0       6142
1        519
9         75
3         73
5         72
10        70
8         68
7         68
6         66
11        60
2         60
dtype: int64


 75%|███████▌  | 750001/1000000 [1:29:33<41:15, 100.99it/s]

Bid for 1,0,0 licenses @ $75 with activity 75 - tensor(45.0093)
Bid for 1,0,1 licenses @ $100 with activity 100 - tensor(15.3883)
4     742216
0       6576
1        551
9         83
5         77
10        76
7         76
3         73
8         71
6         71
11        65
2         65
dtype: int64


 80%|████████  | 800001/1000000 [1:35:33<34:31, 96.56it/s] 

Bid for 1,0,0 licenses @ $75 with activity 75 - tensor(44.9727)
Bid for 1,0,1 licenses @ $100 with activity 100 - tensor(15.3879)
4     791711
0       6997
1        586
9         89
3         83
7         82
5         81
10        79
8         76
6         74
11        71
2         71
dtype: int64


 85%|████████▍ | 849998/1000000 [1:41:33<17:15, 144.85it/s]

Bid for 1,0,0 licenses @ $75 with activity 75 - tensor(45.0018)
Bid for 1,0,1 licenses @ $100 with activity 100 - tensor(15.4248)
4     841195
0       7439
1        609
9         93
5         89
7         88
10        87
3         85
8         82
6         80
2         77
11        76
dtype: int64


 90%|████████▉ | 899985/1000000 [1:47:33<11:36, 143.55it/s]

Bid for 1,0,0 licenses @ $75 with activity 75 - tensor(44.9824)
Bid for 1,0,1 licenses @ $100 with activity 100 - tensor(15.4474)


 90%|█████████ | 900013/1000000 [1:47:33<19:24, 85.84it/s] 

4     890680
0       7875
1        642
9         97
5         97
7         93
10        90
3         89
6         87
8         84
11        83
2         83
dtype: int64


 95%|█████████▍| 949989/1000000 [1:53:32<05:46, 144.31it/s]

Bid for 1,0,0 licenses @ $75 with activity 75 - tensor(45.0122)
Bid for 1,0,1 licenses @ $100 with activity 100 - tensor(15.4791)


 95%|█████████▌| 950026/1000000 [1:53:32<07:10, 116.04it/s]

4     940230
0       8262
1        669
5        102
9        100
7         98
10        93
3         93
8         90
6         89
2         89
11        85
dtype: int64


100%|██████████| 1000000/1000000 [1:59:36<00:00, 139.35it/s]


In [None]:
[d for d in checkpoint.agents[0]._rl_agent._replay_buffer._data if d.reward in [50., 30.]]

In [None]:
pd.DataFrame(q).drop_duplicates()

In [None]:
df = pd.DataFrame.from_records(records)

In [None]:
df = df.drop_duplicates(subset=['infostate_string'])
print(f'After dropping, {len(df)} unique infostates remaining')