In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [38]:
import sys
import glob
import pandas as pd
import os
import seaborn as sns

from tqdm import tqdm
from statsmodels.distributions.empirical_distribution import ECDF
from collections import defaultdict
import pickle
import re
import json
from pathlib import Path
import scipy.stats


from open_spiel.python.algorithms.exploitability import nash_conv, best_response
from open_spiel.python.examples.ubc_plotting_utils import *
from open_spiel.python.examples.ubc_sample_game_tree import sample_game_tree, flatten_trees, flatten_tree
from open_spiel.python.examples.ubc_clusters import projectPCA, fitGMM
from open_spiel.python.examples.ubc_utils import *

from auctions.webutils import *

os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"


from open_spiel.python.examples.ubc_cma import analyze_checkpoint

output_notebook()
from open_spiel.python.games.clock_auction_base import InformationPolicy

In [58]:
def compute_per_type_combo(state_fn, policy, game, **kwargs):
    """Compute some function of the initial state for each type combo.

    Args:
    - state_fn: (policy, state, **kwargs) -> value
    - policy: CFR policy
    - game: auction game
    - kwargs: passed to state_fn

    Returns:
    - results: dict of {type combination: value}
    """

    # Count number of types for each player
    state = game.new_initial_state()
    num_types = []
    for player in range(game.num_players()):
        num_types.append(len(state.chance_outcomes()))
        state = state.child(0)

    # Test each type combo independently
    results = {}
    for combo in itertools.product(*[range(n) for n in num_types]):
        state = game.new_initial_state()
        for type in combo:
            state = state.child(type)

        results[combo] = state_fn(policy, state, **kwargs)
    return results

def min_prob(policy, state):
    if state.is_terminal():
        return []
    elif state.is_chance_node():
        chance_outcomes = state.chance_outcomes()
        if len(chance_outcomes) == 1:
            return min_prob(policy, state.child(0))
        else:
            return [
                min_prob(policy, state.child(action))
                for action, _ in state.chance_outcomes()
            ]
    else:
        action_probs = policy.action_probabilities(state)
        modal_action = max(action_probs, key=lambda x: action_probs[x])
        return [action_probs[modal_action]] + min_prob(policy, state.child(modal_action))

def get_demand_history(state, history_type='processed'):
    if not state.is_terminal():
        raise ValueError("State must be terminal")
    demand_fn = lambda bidder: bidder.processed_demand[1:] if history_type == 'processed' else bidder.submitted_demand[1:] # bidder -> (round, product)
    demand_histories = np.array([demand_fn(bidder) for bidder in state.bidders]) # (bidder, round, product)
    demand_histories = demand_histories.transpose(1, 0, 2) # (round, bidder, product)
    demand_histories = tuple(tuple(tuple(d) for d in round_demands) for round_demands in demand_histories) # convert to tuples for dict keys
    return demand_histories

def history_distribution(policy, state, min_prob=0.01, history_type='processed'):
    """Return the distribution over processed/submitted demand histories for the given policy and state.

    TODO: add an option to only include randomness from the policy, not from chance nodes?
    unsure if this would be interpretable (sum will be >1).
    """
    if state.is_terminal():
        history = get_demand_history(state, history_type)
        return {history: 1.0}

    else:
        dist = defaultdict(float)
        action_dist = state.chance_outcomes() if state.is_chance_node() else policy.action_probabilities(state).items()
        for action, action_prob in action_dist:
            if action_prob >= min_prob:
                dist2 = history_distribution(policy, state.child(action), min_prob=min_prob, history_type=history_type)
                for history, history_prob in dist2.items():
                    dist[history] += action_prob * history_prob
        return dist
    
def get_history_entropy(history_distribution):
    """Compute the entropy of a history distribution."""
    return scipy.stats.entropy(list(history_distribution.values()))



In [59]:
# load runs from experiments
experiments = ['jun2', 'jun2outcome']
runs = []
for experiment in experiments:
    runs += Experiment.objects.get(name=experiment).equilibriumsolverrun_set.all()
print(f"Found {len(runs)} runs")

Found 90 runs


# Analyze runs

In [66]:
game_cache = {}

def get_results(run, skip_single_chance_nodes=True):
    """Load the game, final checkpoint, and policy for a single run.
    """
    game = game_cache.get(run.game.name, run.game.load_as_spiel())
    game.auction_params.skip_single_chance_nodes = skip_single_chance_nodes # for backwards compatibility
    game_cache[run.game.name] = game

    final_checkpoint = run.equilibriumsolverruncheckpoint_set.last()
    policy = pickle.loads(final_checkpoint.policy)

    return game, final_checkpoint, policy

def get_algorithm_from_run(run):
    """Get the algorithm used for a run."""
    alg = run.config.get('solver_type', 'PPO')
    if alg == 'cfr':
        alg += '_' + run.config.get('sampling_method', '')
    return alg

def display_history_distributions(history_dists):
    for type_combo in history_dists:
        print(type_combo)
        for history, probs in history_dists[type_combo].items():
            print(f'{probs:.3f} {history}')
        print()

In [65]:
# run = runs[0]
run = EquilibriumSolverRun.objects.filter(experiment__name='jun2outcome', game__name='jun2/jun2_4_base.json').first()
game, cp, policy = get_results(run, skip_single_chance_nodes=False)
history_dists = compute_per_type_combo(history_distribution, policy, game, min_prob=0.01, history_type='processed')
display_history_distributions(history_dists)


(0, 0)
1.000 (((1, 1), (1, 1)),)

(0, 1)
1.000 (((1, 1), (1, 1)),)

(1, 0)
1.000 (((1, 1), (1, 1)),)

(1, 1)
1.000 (((1, 1), (1, 1)),)



In [45]:
records = []
for run in tqdm(runs):
    game, final_checkpoint, policy = get_results(run, skip_single_chance_nodes=False)

    history_dists = compute_per_type_combo(history_distribution, policy, game, min_prob=0.001, history_type='processed')
    history_entropies = [scipy.stats.entropy(list(history_dists[type_combo].values())) for type_combo in history_dists]

    record = {
        # clock auction params
        'game_name': run.game.name, 
        'potential': run.config.get('potential_function', 'None'),
        'seed': run.config.get('seed'), 
        'run_name': run.name,

        # CMA knobs
        'information_policy': InformationPolicy(game.auction_params.information_policy).name,
        'clock_speed': game.auction_params.increment,
        'base_game_name': '_'.join(run.game.name.split('/')[1].split('_')[:2]), # Stupid naming convention that will surely bite us later

        # solver information
        'alg': get_algorithm_from_run(run),
        'walltime': run.walltime(),

        # metrics from eval
        **analyze_checkpoint(final_checkpoint),

        # stats about history distribution
        'avg_distinct_histories': np.mean([len(d) for d in history_dists.values()]),
        'avg_history_entropy': np.mean([scipy.stats.entropy(list(history_dists[type_combo].values())) for type_combo in history_dists]),
    } 

    records.append(record)

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 90/90 [00:07<00:00, 11.58it/s]


In [46]:
df = pd.DataFrame.from_records(records)

In [50]:
df

Unnamed: 0,game_name,potential,seed,run_name,information_policy,clock_speed,base_game_name,alg,walltime,p0_utility,p0_payment,p1_utility,p1_payment,total_welfare,total_revenue,auction_lengths,common_allocations,avg_distinct_histories,avg_history_entropy
0,jun2/jun2_1_base.json,,100,jun2_jun2_1_base-cfr_externalmccfr_external-100,SHOW_DEMAND,0.3,jun2_1,cfr_external,10117.170336,8.127864,32.000000,21.031140,59.375664,120.534667,91.375664,2.324862,"[((1, 0), 9995)]",2.5,0.861293
1,jun2/jun2_1_base.json,,101,jun2_jun2_1_base-cfr_externalmccfr_external-101,SHOW_DEMAND,0.3,jun2_1,cfr_external,9355.160500,7.941783,32.000000,21.423640,58.964776,120.330199,90.964776,2.225768,"[((1, 0), 9997)]",2.0,0.407584
2,jun2/jun2_1_base.json,,102,jun2_jun2_1_base-cfr_externalmccfr_external-102,SHOW_DEMAND,0.3,jun2_1,cfr_external,9745.340371,7.984394,32.000000,22.135791,58.456245,120.576431,90.456245,2.155862,"[((1, 0), 9996)]",2.0,0.335654
3,jun2/jun2_4_hide_demand.json,,100,jun2_jun2_4_hide_demand-cfr_externalmccfr_exte...,HIDE_DEMAND,0.3,jun2_4,cfr_external,4811.467835,13.475280,51.000000,2.498499,51.000000,117.973779,102.000000,1.000000,"[((1, 1), 9992)]",1.0,0.000000
4,jun2/jun2_3_hide_demand.json,,102,jun2_jun2_3_hide_demand-cfr_externalmccfr_exte...,HIDE_DEMAND,0.3,jun2_3,cfr_external,8811.695541,20.058906,35.129613,21.940126,41.735842,118.864486,76.865455,1.664566,"[((1, 1), 5040), ((0, 1), 4959)]",1.5,0.315434
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
85,jun2/jun2_4_high_speed.json,,100,jun2_jun2_4_high_speed-cfr_outcomemccfr_outcom...,SHOW_DEMAND,0.6,jun2_4,cfr_outcome,3226.163260,13.475280,51.000000,2.497898,50.992994,117.966173,101.992994,1.000000,"[((1, 1), 9992)]",1.0,0.000000
86,jun2/jun2_1_high_speed.json,,100,jun2_jun2_1_high_speed-cfr_outcomemccfr_outcom...,SHOW_DEMAND,0.6,jun2_1,cfr_outcome,3930.065074,7.925600,32.000000,21.315400,59.132000,120.373000,91.132000,1.713500,"[((1, 0), 10000)]",2.0,0.593842
87,jun2/jun2_1_high_speed.json,,101,jun2_jun2_1_high_speed-cfr_outcomemccfr_outcom...,SHOW_DEMAND,0.6,jun2_1,cfr_outcome,3830.186361,7.992799,32.000000,22.714783,57.686297,120.393879,89.686297,1.586517,"[((1, 0), 9998)]",2.0,0.669414
88,jun2/jun2_0_base.json,,100,jun2_jun2_0_base-cfr_outcomemccfr_outcome-100,SHOW_DEMAND,0.3,jun2_0,cfr_outcome,3963.325699,24.643923,50.507031,14.808497,38.242287,128.201739,88.749319,1.859342,"[((1, 1), 7475), ((1, 0), 2528)]",2.5,0.200505


In [49]:
groupers = ['base_game_name', 'clock_speed', 'information_policy']
df.sort_values(groupers).set_index([df.index]+groupers, drop=True).to_csv('jun5_results.csv')