In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import sys
import glob
import pandas as pd
import os
import seaborn as sns

from tqdm import tqdm
from statsmodels.distributions.empirical_distribution import ECDF
from collections import defaultdict
import pickle
import re
import json
from pathlib import Path
import scipy.stats
import time

os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"
import django
django.setup()
from auctions.webutils import *

from open_spiel.python.algorithms.exploitability import nash_conv, best_response
from open_spiel.python.examples.ubc_plotting_utils import *
from open_spiel.python.examples.ubc_sample_game_tree import sample_game_tree, flatten_trees, flatten_tree
from open_spiel.python.examples.ubc_clusters import projectPCA, fitGMM
from open_spiel.python.examples.ubc_utils import *
import open_spiel.python.examples.ubc_dispatch as dispatch



from open_spiel.python.examples.ubc_cma import *

output_notebook()
from open_spiel.python.games.clock_auction_base import InformationPolicy, ActivityPolicy, UndersellPolicy, TiebreakingPolicy
from open_spiel.python.algorithms.exploitability import nash_conv, best_response
from open_spiel.python.examples.ubc_decorators import TakeSingleActionDecorator, TremblingAgentDecorator, ModalAgentDecorator

In [10]:
# load runs from experiments
experiments = [
    'feb3_test_short_v4'
]

runs = []
for experiment in experiments:
    runs += Experiment.objects.get(name=experiment).equilibriumsolverrun_set.all()
print(f"Found {len(runs)} runs")

Found 100 runs


In [12]:
cp = runs[0].equilibriumsolverruncheckpoint_set.all()

In [13]:
cp[0].evaluation_set.all()

<QuerySet [<Evaluation: Evaluation p0=straightforward for jan12_repro_4t_jan12_repro_4t_3_base_dev1000_rho0_t4_tie_break-ppo_jun8_23ppo_76-104 (feb3_test_short_v4) Iteration 8192>, <Evaluation: Evaluation p1=straightforward for jan12_repro_4t_jan12_repro_4t_3_base_dev1000_rho0_t4_tie_break-ppo_jun8_23ppo_76-104 (feb3_test_short_v4) Iteration 8192>, <Evaluation: Evaluation  for jan12_repro_4t_jan12_repro_4t_3_base_dev1000_rho0_t4_tie_break-ppo_jun8_23ppo_76-104 (feb3_test_short_v4) Iteration 8192>, <Evaluation: Evaluation p0=modal+p1=modal for jan12_repro_4t_jan12_repro_4t_3_base_dev1000_rho0_t4_tie_break-ppo_jun8_23ppo_76-104 (feb3_test_short_v4) Iteration 8192>, <Evaluation: Evaluation p0=tremble+p1=tremble for jan12_repro_4t_jan12_repro_4t_3_base_dev1000_rho0_t4_tie_break-ppo_jun8_23ppo_76-104 (feb3_test_short_v4) Iteration 8192>, <Evaluation: Evaluation p0=straightforward+p1=straightforward for jan12_repro_4t_jan12_repro_4t_3_base_dev1000_rho0_t4_tie_break-ppo_jun8_23ppo_76-104 (feb

In [85]:
# TODO: Belongs in script

from open_spiel.python.examples.straightforward_agent import StraightforwardAgent
from open_spiel.python.examples.ubc_decorators import TakeSingleActionDecorator

# TODO: Policy arg is global
def get_modal_nash_conv(game, policy, config):
    env_and_policy = make_env_and_policy(game, config)
    for agent in env_and_policy.agents:
        agent.policy = policy
    for player in range(game.num_players()):
        env_and_policy.agents[player] = ModalAgentDecorator(env_and_policy.agents[player])
    modal_policy = env_and_policy.make_policy()
    return get_nash_conv(game, modal_policy)

def get_nash_conv(game, policy):
    worked, time_taken, retval = time_bounded_run(300, nash_conv, game, policy, return_only_nash_conv=True, restrict_to_heuristics=False)
    if worked:
        return retval
    else:
        return None

def get_straightforward_nash_conv(game):
    env_and_policy = make_env_and_policy(game, dict())
    for player in range(game.num_players()):
        env_and_policy.agents[player] = TakeSingleActionDecorator(StraightforwardAgent(player, game), game.num_distinct_actions())
    straightforward_policy = env_and_policy.make_policy()
    print(straightforward_policy)
    return get_nash_conv(game, straightforward_policy)
    
def get_stupid_nash_conv(game):
    env_and_policy = make_env_and_policy(game, dict(), EnvParams(num_envs=1, sync=False))
    for player in range(game.num_players()):
        env_and_policy.agents[player] = StupidAgent(player, game)
    straightforward_policy = env_and_policy.make_policy()
    return get_nash_conv(game, straightforward_policy)
    



# Analyze runs

In [86]:
records = []
for run in tqdm(runs):    
    game = run.game.load_as_spiel()
    record = {
        'run_name': run.name,
        'game_name': run.game.name, 
        'seed': run.config.get('seed'), 
        'config': run.get_config_name(),
        'alg': get_algorithm_from_run(run),
    }
    
    record.update(get_game_info(game, run.game))  
    
    record['no_error'] = False
    records.append(record) # Put it here so you see the False's in the display
        
    try:
        game, final_checkpoint, policy = get_results(run, load_policy=True)
    except Exception as e:
        print(f"Skipping run {run.name} because of error {e}")
        continue
    
    try:
        
        record['t'] = final_checkpoint.t
        record['walltime'] = run.walltime(),
        evaluation = final_checkpoint.get_modal_eval()
        
        record['nash_conv'] = evaluation.nash_conv
        record['rewards'] = evaluation.mean_rewards
        record['nash_conv_frac'] = evaluation.nash_conv / sum(evaluation.mean_rewards) if not pd.isnull(evaluation.nash_conv) else np.nan
        record['heuristic_conv'] = evaluation.heuristic_conv
        record['heuristic_conv_frac'] = evaluation.heuristic_conv / sum(evaluation.mean_rewards) if not pd.isnull(evaluation.heuristic_conv) else np.nan

        for i in range(game.num_players()):
            record[f'rewards_{i}'] = evaluation.mean_rewards[i]
            record[f'nc_player_improvements_{i}'] = evaluation.nash_conv_player_improvements[i] if not pd.isnull(evaluation.nash_conv) else np.nan
            record[f'nc_player_improvements_frac_{i}'] = (evaluation.nash_conv_player_improvements[i] / evaluation.mean_rewards[i]) if not pd.isnull(evaluation.nash_conv) else np.nan

            record[f'hc_player_improvements_{i}'] = evaluation.heuristic_conv_player_improvements[i] if not pd.isnull(evaluation.heuristic_conv) else np.nan
            record[f'hc_player_improvements_frac_{i}'] = (evaluation.heuristic_conv_player_improvements[i] / evaluation.mean_rewards[i]) if not pd.isnull(evaluation.heuristic_conv) else np.nan

        record.update(**analyze_samples(evaluation.samples, game))

        nc = record['nash_conv']
        hc = record['heuristic_conv']
        # print(f"NashConv = {(np.nan if pd.isnull(nc) else nc):.2f}; HeuristicConv = {(np.nan if pd.isnull(hc) else hc):.2f}")
        
        
        print(get_modal_nash_conv(game, policy, dict(final_checkpoint.equilibrium_solver_run.config)))
        # print(get_straightforward_nash_conv(game))
        # print(get_stupid_nash_conv(game))
        
        
        record['no_error'] = True
    except Exception as e:
        print(f"Something wrong with {run}. Skipping. {e}")
        # raise e
        # break
        import traceback
        print(traceback.format_exc())

print(len(records))

  0%|                                                                                                                                                                                                                                                 | 0/400 [00:00<?, ?it/s]

False
dict_keys(['info_state', 'legal_actions', 'state', 'current_player'])
False
dict_keys(['info_state', 'legal_actions', 'state', 'current_player'])
False
dict_keys(['info_state', 'legal_actions', 'state', 'current_player'])
False
dict_keys(['info_state', 'legal_actions', 'state', 'current_player'])
False
dict_keys(['info_state', 'legal_actions', 'state', 'current_player'])
False
dict_keys(['info_state', 'legal_actions', 'state', 'current_player'])
False
dict_keys(['info_state', 'legal_actions', 'state', 'current_player'])
False
dict_keys(['info_state', 'legal_actions', 'state', 'current_player'])
False
dict_keys(['info_state', 'legal_actions', 'state', 'current_player'])
False
dict_keys(['info_state', 'legal_actions', 'state', 'current_player'])
False
dict_keys(['info_state', 'legal_actions', 'state', 'current_player'])
False
dict_keys(['info_state', 'legal_actions', 'state', 'current_player'])
False
dict_keys(['info_state', 'legal_actions', 'state', 'current_player'])
False
dict_k

  0%|                                                                                                                                                                                                                                                 | 0/400 [00:03<?, ?it/s]

False
dict_keys(['info_state', 'legal_actions', 'state', 'current_player'])
False
dict_keys(['info_state', 'legal_actions', 'state', 'current_player'])
False
dict_keys(['info_state', 'legal_actions', 'state', 'current_player'])
False
dict_keys(['info_state', 'legal_actions', 'state', 'current_player'])
False
dict_keys(['info_state', 'legal_actions', 'state', 'current_player'])
False
dict_keys(['info_state', 'legal_actions', 'state', 'current_player'])
False
dict_keys(['info_state', 'legal_actions', 'state', 'current_player'])
False
dict_keys(['info_state', 'legal_actions', 'state', 'current_player'])
False
dict_keys(['info_state', 'legal_actions', 'state', 'current_player'])
False
dict_keys(['info_state', 'legal_actions', 'state', 'current_player'])
False
dict_keys(['info_state', 'legal_actions', 'state', 'current_player'])
False
dict_keys(['info_state', 'legal_actions', 'state', 'current_player'])
False
dict_keys(['info_state', 'legal_actions', 'state', 'current_player'])
False
dict_k



KeyboardInterrupt



In [None]:
df = pd.DataFrame.from_records(records)
with pd.option_context('display.max_rows', None):
    display(df[['value_structure', 'rule', 'base_game_name', 'deviations', 'no_error']].value_counts().sort_index())

In [None]:
df['heuristic_conv'].plot(kind='hist')
print(df['heuristic_conv'].isnull().sum())

df.query('heuristic_conv.isnull()')[['value_structure', 'rule', 'base_game_name', 'deviations', 'no_error']].value_counts().sort_index()
# Huh???? Why are 0 deviation games showing as null. Is that for real?

In [None]:
df['nash_conv'].plot(kind='hist')
print(df['nash_conv'].isnull().sum())

df.query('nash_conv.isnull()')[['value_structure', 'rule', 'base_game_name', 'deviations', 'no_error']].value_counts().sort_index()
# Huh???? Why are 0 deviation games showing as null. Is that for real?

In [None]:
df.groupby(['deviations', 'value_structure'])['auction_lengths'].plot(kind='hist', legend=True)

In [None]:
palette = dict()
colors = ['red', 'blue', 'magenta', 'green', 'orange', 'brown', 'black', 'navy', 'pink', 'gold', 'darkgreen', 'orangered', 'olive']
# for i, v in enumerate(df['variant'].unique()):
#     palette[v] = colors[i]

In [None]:
# Need to a) Remove "bad" entries b) Be careful about comparisons that are missing datapoints 
df_plt = df.copy()

In [None]:
### Remove bad entries

# TODO: generalize to 3+ players

good_thresh = 0.1
# good_thresh_abs = 5
# df_plt = df.query(f'player_improvements_0 < {good_thresh_abs} and player_improvements_1 < {good_thresh_abs}')
df_plt = df.query(f'hc_player_improvements_frac_0 < {good_thresh} and hc_player_improvements_frac_1 < {good_thresh}').copy()
# df_plt = df.query(f'nash_conv_frac < {good_thresh}')
len(df), len(df_plt)

#### So many removed... wow


In [None]:


# 1) Get max/min for each valuation/treatment pairing over each stat

# First query down to relevant datapoints. Then groupby rule change and SATS =(game_name) and max/min?
metrics = ['total_revenue', 'total_welfare', 'auction_lengths', 'num_lotteries', 'p_lottery', 'exposure_frac']
for i in range(2): # TODO:
    metrics += [f'p{i}_utility', f'p{i}_payment']


df_plt_indexed = df_plt.set_index(['value_structure', 'rule', 'deviations']).sort_index().copy()

def make_data_dict(df):
    data = dict()
    for metric in metrics:
        data[f'max_{metric}'] = df.groupby('base_game_name')[metric].max()
        data[f'min_{metric}'] = df.groupby('base_game_name')[metric].min()
    return pd.DataFrame(data)
    
for idx, grp_df in df_plt.groupby(['value_structure', 'rule', 'deviations']):
    if idx[1] == 'base':
        continue
        
    try:
        data_grp_df = make_data_dict(grp_df)
        normalizer_grp_df = df_plt_indexed.loc[(idx[0], 'base')]
        data_normalized_df = make_data_dict(normalizer_grp_df)

        cmap_norm = plt.matplotlib.colors.TwoSlopeNorm(vmin=0.9, vcenter=1, vmax=1.1)
        cmap = plt.cm.get_cmap('RdBu').copy()
        cmap.set_bad('magenta')
        plt.figure(figsize=(12, 8))
        data = (data_grp_df / data_normalized_df).values.T
        plt.imshow(data, cmap=cmap, norm=cmap_norm)
        plt.title(idx)
        plt.yticks(range(len(data_grp_df.columns)), data_grp_df.columns)
        plt.colorbar()
        plt.show()
    except Exception as e:
        print(idx, e)

In [None]:
for metric_name in ['auction_lengths', 'num_lotteries', 'unsold_licenses', 'total_revenue', 'total_welfare']:
    plt.figure(figsize=(12, 4))
    sns.swarmplot(data=df, x='base_game_name', y=metric_name, hue='tiebreaking_policy', dodge=True, size=4)
    plt.gca(); plt.legend([], [], frameon=False)
    plt.title(metric_name)
    plt.show()

In [None]:
plt.figure(figsize=(12, 4))
sns.swarmplot(data=df, x='base_game_name', y='auction_lengths', hue='tiebreaking_policy', dodge=True, size=4)
plt.gca(); plt.legend([], [], frameon=False)

In [None]:
plt.figure(figsize=(12, 4))
sns.swarmplot(data=df, x='base_game_name', y='num_lotteries', hue='tiebreaking_policy', dodge=True, size=4)
plt.gca(); plt.legend([], [], frameon=False)

In [None]:
plt.figure(figsize=(12, 4))
sns.swarmplot(data=df, x='base_game_name', y='unsold_licenses', hue='tiebreaking_policy', dodge=True, size=4)
plt.gca(); plt.legend([], [], frameon=False)

In [None]:
plt.figure(figsize=(12, 4))
sns.swarmplot(data=df, x='base_game_name', y='total_revenue', hue='tiebreaking_policy', dodge=True, size=4)
plt.gca(); plt.legend([], [], frameon=False)

In [None]:
plt.figure(figsize=(12, 4))
sns.swarmplot(data=df, x='base_game_name', y='total_welfare', hue='tiebreaking_policy', dodge=True, size=4)
plt.gca(); plt.legend([], [], frameon=False)

In [None]:
g = sns.swarmplot(data=df.query('value_structure == "quasi_linear"'), x='base_game_name', y='total_welfare', hue='tiebreaking_policy', dodge=True, size=4)
plt.gca(); plt.legend([], [], frameon=False)

In [None]:
# How to get straightforward eval?

records = []
for run in tqdm(runs):    
    game = run.game.load_as_spiel()
    record = {
        'run_name': run.name,
        'game_name': run.game.name, 
        'seed': run.config.get('seed'), 
        'config': run.get_config_name(),
        'alg': get_algorithm_from_run(run),
    }
    
    record.update(get_game_info(game, run.game))  
    
    record['no_error'] = False
    records.append(record) # Put it here so you see the False's in the display
        
    try:
        game, final_checkpoint, policy = get_results(run, load_policy=False)
    except Exception as e:
        print(f"Skipping run {run.name} because of error {e}")
        continue
    
    try:
        
        record['t'] = final_checkpoint.t
        record['walltime'] = run.walltime(),
        evaluation = final_checkpoint.get_straightforward_eval()
        
        record['nash_conv'] = evaluation.nash_conv
        record['rewards'] = evaluation.mean_rewards
        record['nash_conv_frac'] = evaluation.nash_conv / sum(evaluation.mean_rewards) if not pd.isnull(evaluation.nash_conv) else np.nan
        record['heuristic_conv'] = evaluation.heuristic_conv
        record['heuristic_conv_frac'] = evaluation.heuristic_conv / sum(evaluation.mean_rewards) if not pd.isnull(evaluation.heuristic_conv) else np.nan

        for i in range(game.num_players()):
            record[f'rewards_{i}'] = evaluation.mean_rewards[i]
            record[f'nc_player_improvements_{i}'] = evaluation.nash_conv_player_improvements[i] if not pd.isnull(evaluation.nash_conv) else np.nan
            record[f'nc_player_improvements_frac_{i}'] = (evaluation.nash_conv_player_improvements[i] / evaluation.mean_rewards[i]) if not pd.isnull(evaluation.nash_conv) else np.nan

            record[f'hc_player_improvements_{i}'] = evaluation.heuristic_conv_player_improvements[i] if not pd.isnull(evaluation.heuristic_conv) else np.nan
            record[f'hc_player_improvements_frac_{i}'] = (evaluation.heuristic_conv_player_improvements[i] / evaluation.mean_rewards[i]) if not pd.isnull(evaluation.heuristic_conv) else np.nan

        record.update(**analyze_samples(evaluation.samples, game))

        nc = record['nash_conv']
        hc = record['heuristic_conv']
        # print(f"NashConv = {(np.nan if pd.isnull(nc) else nc):.2f}; HeuristicConv = {(np.nan if pd.isnull(hc) else hc):.2f}")
        
        record['no_error'] = True
    except Exception as e:
        print(f"Something wrong with {run}. Skipping. {e}")
        # raise e
        # break
        import traceback
        print(traceback.format_exc())

print(len(records))

In [None]:
df = pd.DataFrame.from_records(records)
with pd.option_context('display.max_rows', None):
    display(df[['value_structure', 'rule', 'base_game_name', 'deviations', 'no_error']].value_counts().sort_index())

In [None]:
for metric_name in ['auction_lengths', 'num_lotteries', 'unsold_licenses', 'total_revenue', 'total_welfare']:
    plt.figure(figsize=(12, 4))
    sns.swarmplot(data=df, x='base_game_name', y=metric_name, hue='tiebreaking_policy', dodge=True, size=4)
    plt.gca(); plt.legend([], [], frameon=False)
    plt.title(metric_name)
    plt.show()

In [None]:
df['nash_conv'] # Would need to compute