In [4]:
import pandas as pd
import pickle
import numpy as np

In [5]:
def process(res, df, hps=None, datatype=None, first=None):
    assert (df.status == 'Failed').sum() == 0
    if (df.status == 'Locked').sum() > 0:
        return res
    assert len(np.unique(df['dataset_key'])) == 1
    assert len(np.unique(df['method_key'])) == 1
    method_key = df['method_key'].iloc[0]
    dataset_key = df['dataset_key'].iloc[0]
    if datatype is not None and datatype not in dataset_key.lower():
        return res
    #assert (method_key,dataset_key) not in res
    if first is not None:
        df = df.iloc[np.arange(first)]
    bias_col = 'bias' if 'bias' in df.columns else 'bias_l2_h1'
    time_col = 'time1' if 'time1' in df.columns else 'time'
    res[(method_key,dataset_key)] = {
        'bias_mean': df[bias_col].mean(),
        'bias_std_error': df[bias_col].std() / np.sqrt(len(df)),
        'time_mean': df[time_col].mean(),
        'time_std_error': df[time_col].std() / np.sqrt(len(df)),
        'checksum_mean': df['checksum'].mean(),
        'checksum_std_error': df['checksum'].std() / np.sqrt(len(df)),
        'compression_kwargs': None if 'compression_kwargs' not in df.columns else df['compression_kwargs'].iloc[0]
    }
    return res

In [6]:
def process_dust(res, filenames, datatype='categorical', first=None):
    dfs = {}
    for filename in tqdm.tqdm(filenames):
        df = pd.read_csv(filename, index_col=0)
        assert (df.status == 'Failed').sum() == 0
        assert (df.status == 'Locked').sum() == 0
        method_key = df['method_key'].iloc[0].split('|')[0].strip()
        dataset_key = df['dataset_key'].iloc[0].split('|')[0].strip()
        if not(datatype is not None and datatype not in dataset_key.lower()):
            dfs[(method_key,dataset_key)] = dfs.get((method_key,dataset_key), []) + [df]
    for (method_key,dataset_key) in tqdm.tqdm(dfs):
        df = pd.concat(dfs[(method_key,dataset_key)])
        res[(method_key,dataset_key)] = {
            'bias_mean': df['bias_h1'].mean(),
            'bias_std_error': df['bias_h1'].std() / np.sqrt(len(df)),
            'imbalance_mean': df['imbalance'].mean(),
            'imbalance_std_error': df['imbalance'].std() / np.sqrt(len(df)),
            'checksum_mean': df['checksum'].mean(),
            'checksum_std_error': df['checksum'].std() / np.sqrt(len(df)),
        }
    return res

In [7]:
import json
import glob
import tqdm
res = {}
for filename in tqdm.tqdm(glob.glob('../outputs/tune_transportability_tbi/*.csv')):
    process(res, pd.read_csv(filename, index_col=0))
for filename in tqdm.tqdm(glob.glob('../outputs/tune_ate_ihdp_news/*.csv')):
    process(res, pd.read_csv(filename, index_col=0))

100%|███████████████████████████████████████████████████████████| 14/14 [00:00<00:00, 35.07it/s]
100%|███████████████████████████████████████████████████████████| 28/28 [00:00<00:00, 47.66it/s]


import os
for filename in wrong_filenames:
    print(os.remove(filename))

In [8]:
pareto_df = {}
union = set()
intersection = None
dataset_keys = np.unique([key[1] for key in res])
hyperparams_dict = {}
hyperparams_datasets_dict = {}
compression_kwargs_list = []
for dataset_key in dataset_keys:
    res_agg = []
    print(dataset_key)
    for tup,dic in res.items():
        if tup[1] == dataset_key:
            method_key = tup[0]
            dic['method'] = method_key
            res_agg.append(dic)
    res_agg_df = pd.DataFrame(res_agg).sort_values('bias_mean').set_index('method')
    with pd.option_context('display.max_rows', None, 'display.max_columns', None):
        display(res_agg_df)
    pareto_df[dataset_key] = res_agg_df[['bias_mean']].rename(columns={'bias_mean':'bias ' + dataset_key, 'mse_mean':  'mse ' + dataset_key })

IHDP


Unnamed: 0_level_0,bias_mean,bias_std_error,time_mean,time_std_error,checksum_mean,checksum_std_error,compression_kwargs
method,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
"KB (linear, lmb0.0001)",0.075722,0.010897,0.503429,0.003136,152.050684,1.913948,
"PCA+KB (linear, lmb0.0001) (k10)",0.07672,0.012889,0.576419,0.005293,152.050684,1.913948,{'k': 10}
"KB (energy, lmb0.0001)",0.07834,0.013932,0.422854,0.002778,152.050684,1.913948,
"ABS+KB (energy, lmb0.0001) (k10 neural_different_outputs 10 100000 True 0.01 [200] [200] 0)",0.079103,0.011119,1.081667,0.1284,152.050684,1.913948,{'fitter_class': 'AutoDMLBalancingScoreFitter'...
"PCA+KB (energy, lmb0.0001) (k10)",0.080104,0.013608,0.464187,0.014519,152.050684,1.913948,{'k': 10}
EntB,0.086858,0.012919,0.092425,0.005739,152.050684,1.913948,
"ABS+KB (linear, lmb0.0001) (k10 neural_different_outputs 10 100000 True 0.01 [200] [200] 0)",0.087054,0.008757,1.026811,0.046156,152.050684,1.913948,{'fitter_class': 'AutoDMLBalancingScoreFitter'...
"PS+KB (energy, lmb0.0001) (GradientBoostingClassifier, logitsFalse)",0.095751,0.011707,0.470884,0.001537,152.050684,1.913948,"{'model': 'GradientBoostingClassifier', 'logit..."
"PS+KB (linear, lmb0.0001) (GradientBoostingClassifier, logitsFalse)",0.105346,0.016507,0.594775,0.005347,152.050684,1.913948,"{'model': 'GradientBoostingClassifier', 'logit..."
"IPW ('propensity', 'GradientBoostingClassifier')",0.113737,0.024491,0.179455,0.009741,152.050684,1.913948,


News


Unnamed: 0_level_0,bias_mean,bias_std_error,time_mean,time_std_error,checksum_mean,checksum_std_error,compression_kwargs
method,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
"NSM+KB (energy, lmb0.0001) (k10 neural_different_outputs 10 100000 True 0.01 [200] [200] 0)",0.069581,0.012522,32.480023,0.56338,2443.516178,26.318289,"{'fitter_class': 'NSMBalancingScoreFitter', 'm..."
"NSM+KB (linear, lmb0.0001) (k10 neural_different_outputs 10 100000 True 0.01 [200] [200] 0)",0.113427,0.01755,34.527188,3.250757,2443.516178,26.318289,"{'fitter_class': 'NSMBalancingScoreFitter', 'm..."
"ABS+KB (linear, lmb0.0001) (k10 neural_different_outputs 10 100000 True 0.01 [200] [200] 0)",0.122745,0.013082,36.825392,3.992654,2443.516178,26.318289,{'fitter_class': 'AutoDMLBalancingScoreFitter'...
"ABS+KB (energy, lmb0.0001) (k10 neural_different_outputs 10 100000 True 0.01 [200] [200] 0)",0.128087,0.014249,32.093979,1.123756,2443.516178,26.318289,{'fitter_class': 'AutoDMLBalancingScoreFitter'...
"KB (linear, lmb0.0001)",0.168053,0.011301,28.872797,0.238013,2443.516178,26.318289,
EntB,0.221212,0.020049,3366.110849,62.757551,2443.516178,26.318289,
"IPW ('propensity', 'GradientBoostingClassifier')",0.27958,0.018341,61.743125,0.456638,2443.516178,26.318289,
"PCA+KB (energy, lmb0.0001) (k10)",0.313827,0.020414,27.071374,0.28368,2443.516178,26.318289,{'k': 10}
"PCA+KB (linear, lmb0.0001) (k10)",0.321202,0.022506,26.067451,0.295069,2443.516178,26.318289,{'k': 10}
"PS+KB (energy, lmb0.0001) (GradientBoostingClassifier, logitsFalse)",0.380761,0.026448,83.017999,0.588503,2443.516178,26.318289,"{'model': 'GradientBoostingClassifier', 'logit..."


TBI


Unnamed: 0_level_0,bias_mean,bias_std_error,time_mean,time_std_error,checksum_mean,checksum_std_error,compression_kwargs
method,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
"IPW ('density_ratio', 'GradientBoostingClassifier')",2.27534,0.180551,0.333286,0.002889,537.920084,1.580147,
"ABS+KB (energy, lmb0.0001) (k10 neural_pq 10 100000 True 0.01 [200] [200] 0)",4.997162,0.370863,1.958905,0.250705,537.920084,1.580147,{'fitter_class': 'AutoDMLBalancingScoreFitter'...
"NSM+KB (energy, lmb0.0001) (k10 neural_pq 10 100000 True 0.01 [200] [200] 0)",5.397291,0.531452,2.180909,0.06488,537.920084,1.580147,"{'fitter_class': 'NSMBalancingScoreFitter', 'm..."
EntB,7.627308,0.59795,0.300693,0.008601,537.920084,1.580147,
unweighted,7.666772,0.146077,0.07501,0.000559,537.920084,1.580147,
"PS+KB (energy, lmb0.0001) (GradientBoostingClassifier, logitsFalse)",7.790955,0.53397,1.284434,0.017502,537.920084,1.580147,"{'model': 'GradientBoostingClassifier', 'logit..."
"PCA+KB (energy, lmb0.0001) (k10)",10.646464,0.817635,1.740465,0.021997,537.920084,1.580147,{'k': 10}
"KB (energy, lmb0.0001)",10.68738,0.82775,1.570706,0.020249,537.920084,1.580147,
"PS+KB (linear, lmb0.0001) (GradientBoostingClassifier, logitsFalse)",13.217893,1.150271,1.380788,0.015993,537.920084,1.580147,"{'model': 'GradientBoostingClassifier', 'logit..."
"ABS+KB (linear, lmb0.0001) (k10 neural_pq 10 100000 True 0.01 [200] [200] 0)",18.503059,1.609989,2.259091,0.057301,537.920084,1.580147,{'fitter_class': 'AutoDMLBalancingScoreFitter'...
