In [1]:
%load_ext autoreload
%autoreload 2
%pylab inline

import sys
import glob
import pandas as pd
import os
import seaborn as sns
# from tqdm import tqdm_notebook as tqdm
from tqdm import tqdm
import pickle
from statsmodels.distributions.empirical_distribution import ECDF
from collections import defaultdict
import logging
from open_spiel.python.examples.ubc_mccfr_cpp_example import action_to_bids
from open_spiel.python.examples.ubc_nfsp_example import policy_from_checkpoint
from open_spiel.python.pytorch.ubc_nfsp import NFSP
import bokeh
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.models import HoverTool, ColumnDataSource, ColorBar, LogColorMapper, LinearColorMapper
from bokeh.transform import linear_cmap, log_cmap

output_notebook()
logging.basicConfig(format='%(asctime)s %(levelname)s:%(message)s', level=logging.DEBUG, datefmt='%I:%M:%S')
logger = logging.getLogger()
logger.setLevel(logging.INFO)

Populating the interactive namespace from numpy and matplotlib


In [288]:
EXPERIMENT_DIR = '/shared/outputs/big_game_2/lstm_deep'

In [289]:
CHECKPOINT_NAME = 'checkpoint_latest'
checkpoint = policy_from_checkpoint(EXPERIMENT_DIR, CHECKPOINT_NAME)

11:45:41 INFO:Game loaded
11:45:41 INFO:Using game instance: turn_based_simultaneous_game
11:45:41 INFO:Game has a state size of 6010, 12 distinct actions, and 2 players
11:45:41 INFO:Game has 3 products


In [290]:

import torch.nn.functional as F
def noisy_act(temperature):
    def _act(self, info_state, legal_actions):
        info_state = self._avg_network.prep_batch([info_state])
        action_values = self._avg_network(info_state) * temperature
        self._last_action_values = action_values[0]

        legal_values = action_values[0][legal_actions]
        probs = np.zeros(self._num_actions)
        probs[legal_actions] = F.softmax(legal_values, dim=0).detach().numpy()
        probs /= sum(probs)
        action = np.random.choice(len(probs), p=probs)
        return action, probs
    
    return _act

In [291]:
checkpoint.agents[1]._act = noisy_act(0.1).__get__(checkpoint.agents[1], NFSP)

In [292]:
hidden_layers = {}
def get_input(name):
    def hook(model, input, output):
        if name in hidden_layers:
            del hidden_layers[name]
        hidden_layers[name] = input[0].detach()
    return hook

for agent in checkpoint.agents:
    agent._avg_network.output_layer.register_forward_hook(get_input('last_layer'))

In [293]:
records = []

for episode in tqdm(range(10000)):
    time_step = checkpoint.env.reset()

    while not time_step.last():
        player_id = time_step.observations["current_player"]
        agent = checkpoint.agents[player_id]

        legal_actions = time_step.observations["legal_actions"][player_id]
        info_state_flat = time_step.observations["info_state"][player_id]
        info_state = agent._avg_network.reshape_infostate(info_state_flat)
        info_state_string = str(checkpoint.env._state.information_state_string())
        agent_action, agent_probs = agent._act(info_state, legal_actions)
        prob_dict = dict()
        for i in range(len(agent_probs)):
            prob_dict[f'Prob_{i}'] = agent_probs[i]
        
        action_list = [agent_action]
        time_step = checkpoint.env.step(action_list)
        
        if len(info_state) > 1:
            last_round_infostate_tensor = info_state[-2]
            num_products = 3 # TODO: read from game_config
            current_prices = last_round_infostate_tensor[-num_products:]
            processed_demand = last_round_infostate_tensor[-3 * num_products: -2 * num_products]
            sor_exposure = float(current_prices @ processed_demand)
        else:
            sor_exposure = 0
        
        record = {
            'infostate_string': info_state_string,
            'infostate_tensor': info_state,
            'agent_embedding': hidden_layers['last_layer'],
            'agent_id': player_id,
            'sor_exposure': sor_exposure,
            'round': len(info_state),
        }
        record.update(prob_dict)
        records.append(record)

100%|██████████| 10000/10000 [00:48<00:00, 204.77it/s]


In [294]:
df = pd.DataFrame.from_records(records)

In [295]:
df = df.drop_duplicates(subset=['infostate_string'])
print(f'After dropping, {len(df)} unique infostates remaining')

After dropping, 152 unique infostates remaining


In [296]:
OUTPUT_DIR = EXPERIMENT_DIR + '/analysis'
!mkdir -p {OUTPUT_DIR}
with open(f'{OUTPUT_DIR}/{CHECKPOINT_NAME}.pkl', 'wb') as f:
    pickle.dump(df, f)


In [297]:
# with open(f'{OUTPUT_DIR}/{CHECKPOINT_NAME}.pkl', 'rb') as f:
#     df = pickle.load(f)


In [298]:
sub_df = df.query('agent_id == 0 and infostate_string.str.contains("Values:120")', engine='python').copy() # Silly way to filter for type... need better

embeddings = np.stack(sub_df.agent_embedding.map(lambda t: t.numpy()).values).squeeze()
infostates = sub_df.infostate_tensor.map(lambda t: t.numpy()).values
infostate_strings = sub_df.infostate_string.values
# probs = np.stack(sub_df.agent_probs.values).squeeze()

print(f"Type/Player combo contains {len(sub_df)} infostates")

Type/Player combo contains 29 infostates


In [299]:
from sklearn.decomposition import PCA

def projectPCA(X, components=10):
    """
    Find the principal axes of X.
    
    Arguments:
    - X: (n, d) array of data
    - components: number of PCA components to return
    
    Returns:
    - X': (n, components) array of data, projected onto principal components
    - explained_variance: ratio of variance explained by each axis
    """
    pca = PCA(n_components=components)
    X_transformed = pca.fit_transform(X)
    return X_transformed, pca.explained_variance_ratio_

def truncate_colormap(cmap, minval=0.0, maxval=1.0, n=100):
    new_cmap = plt.matplotlib.colors.LinearSegmentedColormap.from_list(
        'trunc({n},{a:.2f},{b:.2f})'.format(n=cmap.name, a=minval, b=maxval),
        cmap(np.linspace(minval, maxval, n)))
    return new_cmap

blues_trunc = truncate_colormap(plt.get_cmap('Blues'), 0.2, 1.0)

In [300]:
pca, variance = projectPCA(embeddings)

In [301]:
sub_df['pca_0'] = pca[:, 0]
sub_df['pca_1'] = pca[:, 1]

In [302]:
def get_actions(game):
    state = checkpoint.game.new_initial_state()
    # Skip over chance nodes
    while state.current_player() < 0:
        state = state.child(0) # Let chance choose first outcome. We're assuming all moves are possible at starting prices for all players, that may not really be true though

    # Now we are at a player state
    for i in range(len(state.legal_actions())):
        print(state.action_to_string(i))

get_actions(checkpoint.game)

Bid for 0,0,0 licenses @ $0 with activity 0
Bid for 0,0,1 licenses @ $25 with activity 25
Bid for 0,1,0 licenses @ $50 with activity 50
Bid for 0,1,1 licenses @ $75 with activity 75
Bid for 1,0,0 licenses @ $75 with activity 75
Bid for 1,0,1 licenses @ $100 with activity 100
Bid for 1,1,0 licenses @ $125 with activity 125
Bid for 1,1,1 licenses @ $150 with activity 150
Bid for 2,0,0 licenses @ $150 with activity 150
Bid for 2,0,1 licenses @ $175 with activity 175
Bid for 2,1,0 licenses @ $200 with activity 200
Bid for 2,1,1 licenses @ $225 with activity 225


In [306]:
from bokeh.palettes import Magma256

In [313]:
q = sub_df.copy()
# Need to change newlines into <br> to have them actually break in the tooltip
q['infostate_string'] = q['infostate_string'].apply(lambda x: x.replace('\n', '<br>').split("Budget: 300<br>")[1])
action_cols = [f'Prob_{i}' for i in range(checkpoint.game.num_distinct_actions())]
source = ColumnDataSource(q.drop(['infostate_tensor', 'agent_embedding'], axis='columns')) # Need to drop tensors b/c of serialization issues

for action_num, action_name in action_dict.items():

    plot = figure(width=900, height=400, title=f"Action {action_num}: {action_name}")

    # add a circle renderer with a size, color, and alpha
    mapper = linear_cmap(field_name=f'Prob_{action_num}', palette=list(reversed(Magma256)) ,low=-.1, high=q[action_cols].values.max())
#     mapper = log_cmap(field_name=f'Prob_{action_num}', palette="Magma256" ,low=1e-9, high=q[action_cols].values.max())
    plot.circle('pca_0', 'pca_1', size=10, color=mapper, alpha=0.3, source=source)

    
    plot.add_tools(HoverTool(tooltips=[['Infostate', '@infostate_string'],
                                       ['Prob', f'@Prob_{action_num}' + '{(.00)}'],
                                       ['SoRExposure', f'@sor_exposure'],
                                       ['Round', '@round'],
                                      ]))
    color_bar = ColorBar(color_mapper=mapper['transform'], label_standoff=12)
    plot.add_layout(color_bar, 'right')

    
    show(plot)


In [None]:
for feature_num in range(17):
    plt_y = [infostates[i][-1][feature_num] for i in range(len(infostates))]
    plt.scatter(pca[:, 0], plt_y, c=probs[:, action_num], alpha=1, cmap=blues_trunc)
    plt.title(feature_num)
    plt.show()

In [None]:
# action_dict = action_to_bids([2,1,1])


# for action_num, action_name in action_dict.items():
#     plt.scatter(pca[:, 0], pca[:, 1], c=probs[:, action_num], alpha=1, cmap=blues_trunc)
#     plt.clim(0, 1) # Consistent across all actions
#     cbar = plt.colorbar()
#     cbar.set_label(f'Action Probability') 
#     plt.title(action_name)
#     plt.show()


