In [1]:
import numpy as np
import pandas as pd
import re
import json
import math
import sys
import os


In [2]:
root = '../src/baselines/runs/'
models = ['bert_oc_gs', 'bert_soc_gs']
datasets = ['davidson', 'founta', 'golbeck', 'harassment', 'hate']

In [40]:
# gridsearch aggregate
result = None
for model in models:
    for dataset in datasets:
        for subdir in os.listdir(f'{root}/{model}'):
            if os.path.exists(f'{root}/{model}/{subdir}/eval_results_0_dev_{dataset}.txt'):
                f = pd.read_csv(
                    filepath_or_buffer=f'{root}{model}/{subdir}/eval_results_0_dev_{dataset}.txt',
                    delimiter='=',
                    # names=['metric','score'],
                    header=None,
                )
                args = pd.read_json(f'{root}{model}/{subdir}/args.json', typ='series')
                args = args.to_frame().reset_index().rename(columns={"index": 0, 0: 1})
                f = pd.concat([f, args], ignore_index=True)
                f = f.append([['model', model[:-3]]], ignore_index=True)
                f = f.append([['dataset', dataset]], ignore_index=True)
                f.set_index(0, inplace=True)
                f = f.transpose()
                if result is None:
                    result = f
                else:
                    result = pd.concat([result, f], ignore_index=True, sort=False)
            else: pass


result.to_csv(f'{root}/gs_results/gridsearch_expl_results_raw.csv', index=False)

In [41]:
result


Unnamed: 0,acc,auc_roc,disparate_impact_favorable_06,disparate_impact_favorable_08,disparate_impact_unfavorable_06,disparate_impact_unfavorable_08,eval_loss,eval_loss_reg,f1,fnr_priv_06,...,local_rank,seed,gradient_accumulation_steps,fp16,loss_scale,server_ip,server_port,continue_from_checkpoint,model,dataset
0,0.93866,0.972684,0.236528,0.426022,1.26339,1.16326,0.168889,4.01722e-05,0.961771,0.0704762,...,-1,42,1,False,0,,,0,bert_oc,davidson
1,0.94431,0.97097,0.121867,0.0864928,1.30454,1.25488,0.158455,0.000468094,0.965396,0.0692063,...,-1,42,1,False,0,,,0,bert_oc,davidson
2,0.957627,0.981972,0.130753,0.0926209,1.2745,1.23213,0.13663,0.000138618,0.9739,0.0495238,...,-1,42,1,False,0,,,0,bert_oc,davidson
3,0.964487,0.987828,0.0964078,0.103284,1.25042,1.20043,0.100869,0.00132101,0.9784,0.0285714,...,-1,42,1,False,0,,,0,bert_oc,davidson
4,0.961259,0.986236,0.131026,0.0928088,1.27366,1.23149,0.107736,0.00142147,0.976143,0.0463492,...,-1,42,1,False,0,,,0,bert_oc,davidson
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
80,0.939866,0.891043,1.02129,1.02923,0.292957,0,0.166691,0.00137342,0.389844,0.71875,...,-1,42,1,False,0,,,0,bert_soc,hate
81,0.940189,0.890779,1.02529,1.0313,0.21853,0,0.167264,0.00150361,0.404819,0.701923,...,-1,42,1,False,0,,,0,bert_soc,hate
82,0.94148,0.893815,1.01529,1.01515,0.509598,0.503864,0.164127,0.00110855,0.413905,0.699519,...,-1,42,1,False,0,,,0,bert_soc,hate
83,0.941561,0.895321,1.02177,1.03329,0.362816,0,0.164603,0.00127491,0.429022,0.679087,...,-1,42,1,False,0,,,0,bert_soc,hate


In [42]:
'bert_soc_gs'[:-3]

'bert_soc'

In [43]:
for model in models:
    for dataset in datasets:
        temp = result[(result['dataset']==dataset) & (result['model'] == model[:-3])]
        temp = temp.sort_values(by=['f1 '], ascending=False)
        temp.to_csv(f'{root}/gs_results/{model[:-3]}_{dataset}.csv', index=False)

In [44]:
best_results_gs = None
for model in models:
    for dataset in datasets:
        if best_results_gs is None:
            best_results_gs = pd.read_csv(f'{root}/gs_results/{model[:-3]}_{dataset}.csv').head(1)
        else:
           best_results_gs = pd.concat([best_results_gs, pd.read_csv(f'{root}/gs_results/{model[:-3]}_{dataset}.csv').head(1)])
best_results_gs.to_csv(f'{root}/gs_results/best_hp_results_expl.csv', index=False)

In [45]:
best_results_gs

Unnamed: 0,acc,auc_roc,disparate_impact_favorable_06,disparate_impact_favorable_08,disparate_impact_unfavorable_06,disparate_impact_unfavorable_08,eval_loss,eval_loss_reg,f1,fnr_priv_06,...,local_rank,seed,gradient_accumulation_steps,fp16,loss_scale,server_ip,server_port,continue_from_checkpoint,model,dataset
0,0.967312,0.988467,0.13591,0.103051,1.237363,1.201032,0.096138,0.001522,0.980113,0.026032,...,-1,42,1,False,0,,,0,bert_oc,davidson
0,0.939206,0.972975,0.185922,0.137582,3.260271,3.294739,0.177459,0.002693,0.889416,0.118084,...,-1,42,1,False,0,,,0,bert_oc,founta
0,0.791582,0.725867,0.934984,1.068872,1.948413,0.0,0.48002,0.000165,0.318408,0.798301,...,-1,42,1,False,0,,,0,bert_oc,golbeck
0,0.903094,0.955676,0.083677,0.059509,2.810277,2.658547,0.257533,0.0011,0.872017,0.175424,...,-1,42,1,False,0,,,0,bert_oc,harassment
0,0.940189,0.889739,1.022384,1.024954,0.135665,0.0,0.167939,0.001446,0.367208,0.74399,...,-1,42,1,False,0,,,0,bert_oc,hate
0,0.970137,0.989351,0.12562,0.10282,1.241612,1.201639,0.104425,0.000818,0.981827,0.024762,...,-1,42,1,False,0,,,0,bert_soc,davidson
0,0.93888,0.972734,0.176721,0.138058,3.257554,3.26481,0.176269,0.002505,0.889327,0.114368,...,-1,42,1,False,0,,,0,bert_soc,founta
0,0.78499,0.725321,0.583135,1.168346,3.507143,0.0,0.515957,0.000545,0.442105,0.649682,...,-1,42,1,False,0,,,0,bert_soc,golbeck
0,0.908895,0.959597,0.086479,0.019939,2.775356,2.703913,0.245078,0.000653,0.880219,0.162342,...,-1,42,1,False,0,,,0,bert_soc,harassment
0,0.941561,0.895321,1.02177,1.033291,0.362816,0.0,0.164603,0.001275,0.429022,0.679087,...,-1,42,1,False,0,,,0,bert_soc,hate


In [66]:
f = pd.read_csv(
                    filepath_or_buffer=f'{root}log_reg_results/glove_davidson_0/eval_results_dev_glove',
                    delimiter='=',
                    header=None,
                )
f[1] = f[1].astype(float)
print(f.dtypes)
args = pd.read_json(f'{root}log_reg_results/glove_davidson_0/args.json', typ='series', dtype=str)
args = args.to_frame().reset_index().rename(columns={"index": 0, 0: 1})
f = pd.concat([f, args], ignore_index=True)
f.set_index(0, inplace=True)
f = f.transpose()

f[['acc ', 'auc_roc ', 'disparate_impact_favorable_06 ',
       'disparate_impact_favorable_08 ', 'disparate_impact_unfavorable_06 ',
       'disparate_impact_unfavorable_08 ', 'f1 ', 'fnr_priv_06 ',
       'fnr_priv_08 ', 'fnr_total_06 ', 'fnr_total_08 ', 'fnr_unpriv_06 ',
       'fnr_unpriv_08 ', 'fpr_priv_06 ', 'fpr_priv_08 ', 'fpr_total_06 ',
       'fpr_total_08 ', 'fpr_unpriv_06 ', 'fpr_unpriv_08 ', 'loss ',
       'precision ', 'priv_n_06 ', 'priv_n_08 ', 'priv_ratio_favorable_06 ',
       'priv_ratio_favorable_08 ', 'priv_ratio_unfavorable_06 ',
       'priv_ratio_unfavorable_08 ', 'priv_total_06 ', 'priv_total_08 ',
       'recall ', 'unpriv_n_06 ', 'unpriv_n_08 ', 'unpriv_ratio_favorable_06 ',
       'unpriv_ratio_favorable_08 ', 'unpriv_ratio_unfavorable_06 ',
       'unpriv_ratio_unfavorable_08 ', 'unpriv_total_06 ', 'unpriv_total_08 ']] = f[['acc ', 'auc_roc ', 'disparate_impact_favorable_06 ',
       'disparate_impact_favorable_08 ', 'disparate_impact_unfavorable_06 ',
       'disparate_impact_unfavorable_08 ', 'f1 ', 'fnr_priv_06 ',
       'fnr_priv_08 ', 'fnr_total_06 ', 'fnr_total_08 ', 'fnr_unpriv_06 ',
       'fnr_unpriv_08 ', 'fpr_priv_06 ', 'fpr_priv_08 ', 'fpr_total_06 ',
       'fpr_total_08 ', 'fpr_unpriv_06 ', 'fpr_unpriv_08 ', 'loss ',
       'precision ', 'priv_n_06 ', 'priv_n_08 ', 'priv_ratio_favorable_06 ',
       'priv_ratio_favorable_08 ', 'priv_ratio_unfavorable_06 ',
       'priv_ratio_unfavorable_08 ', 'priv_total_06 ', 'priv_total_08 ',
       'recall ', 'unpriv_n_06 ', 'unpriv_n_08 ', 'unpriv_ratio_favorable_06 ',
       'unpriv_ratio_favorable_08 ', 'unpriv_ratio_unfavorable_06 ',
       'unpriv_ratio_unfavorable_08 ', 'unpriv_total_06 ', 'unpriv_total_08 ',]].astype(float)

print(f.dtypes)

0     object
1    float64
dtype: object
0
acc                                 float64
auc_roc                             float64
disparate_impact_favorable_06       float64
disparate_impact_favorable_08       float64
disparate_impact_unfavorable_06     float64
disparate_impact_unfavorable_08     float64
f1                                  float64
fnr_priv_06                         float64
fnr_priv_08                         float64
fnr_total_06                        float64
fnr_total_08                        float64
fnr_unpriv_06                       float64
fnr_unpriv_08                       float64
fpr_priv_06                         float64
fpr_priv_08                         float64
fpr_total_06                        float64
fpr_total_08                        float64
fpr_unpriv_06                       float64
fpr_unpriv_08                       float64
loss                                float64
precision                           float64
priv_n_06                         

In [21]:
f = pd.read_csv(
                    filepath_or_buffer=f'{root}bert_oc_results/founta_1/eval_results_0_test_founta.txt',
                    delimiter='=',
                    header=None,
            )
f

Unnamed: 0,0,1
0,acc,0.9419312744671596
1,auc_roc,0.9709648815613618
2,disparate_impact_favorable_06,0.1748234242977409
3,disparate_impact_favorable_08,0.1967497430626927
4,disparate_impact_unfavorable_06,3.269320793484362
5,disparate_impact_unfavorable_08,3.129235484420228
6,eval_loss,0.1761298927761938
7,eval_loss_reg,0.0011569487572533
8,f1,0.8935831008369869
9,fnr_priv_06,0.1052192066805845


In [26]:
f.replace(' None', 0, inplace=True)
f[1] = f[1].astype(float)

In [3]:
# seeds aggregate

result_val = None
result_test = None
pattern_dev = 'eval_results_0_dev'
pattern_test = 'eval_results_0_test'
for subdir in os.listdir(f'{root}bert_oc_results'):
    for file in os.listdir(f'{root}bert_oc_results/{subdir}/'):
        if re.match(pattern_dev, file):
            # print(f'{root}bert_oc_results/{subdir}/{file}')
            f = pd.read_csv(
                    filepath_or_buffer=f'{root}bert_oc_results/{subdir}/{file}',
                    delimiter='=',
                    header=None,
                )
            f.replace(' None', 0, inplace=True)
            f[1] = f[1].astype(float)
            args = pd.read_json(f'{root}bert_oc_results/{subdir}/args.json', typ='series', dtype=str)
            args = args.to_frame().reset_index().rename(columns={"index": 0, 0: 1})
            f = pd.concat([f, args], ignore_index=True)
            f.set_index(0, inplace=True)
            f = f.transpose()
            if result_val is None:
                result_val = f
            else:
                result_val = pd.concat([result_val, f], ignore_index=True, sort=False)
        elif re.match(pattern_test, file):
            # print(f'{root}bert_oc_results/{subdir}/{file}')
            f = pd.read_csv(
                    filepath_or_buffer=f'{root}bert_oc_results/{subdir}/{file}',
                    delimiter='=',
                    header=None,
            )
            f.replace(' None', 0, inplace=True)
            f[1] = f[1].astype(float)
            args = pd.read_json(f'{root}bert_oc_results/{subdir}/args.json', typ='series', dtype=str)
            args = args.to_frame().reset_index().rename(columns={"index": 0, 0: 1})
            f = pd.concat([f, args], ignore_index=True)
            f.set_index(0, inplace=True)
            f = f.transpose()
            if result_test is None:
                result_test = f
            else:
                result_test = pd.concat([result_test, f], ignore_index=True, sort=False)
        else: pass

result_test[['acc ', 'auc_roc ', 'disparate_impact_favorable_06 ',
       'disparate_impact_favorable_08 ', 'disparate_impact_unfavorable_06 ',
       'disparate_impact_unfavorable_08 ', 'f1 ', 'fnr_priv_06 ',
       'fnr_priv_08 ', 'fnr_total_06 ', 'fnr_total_08 ', 'fnr_unpriv_06 ',
       'fnr_unpriv_08 ', 'fpr_priv_06 ', 'fpr_priv_08 ', 'fpr_total_06 ',
       'fpr_total_08 ', 'fpr_unpriv_06 ', 'fpr_unpriv_08 ', 'loss ',
       'precision ', 'priv_n_06 ', 'priv_n_08 ', 'priv_ratio_favorable_06 ',
       'priv_ratio_favorable_08 ', 'priv_ratio_unfavorable_06 ',
       'priv_ratio_unfavorable_08 ', 'priv_total_06 ', 'priv_total_08 ',
       'recall ', 'unpriv_n_06 ', 'unpriv_n_08 ', 'unpriv_ratio_favorable_06 ',
       'unpriv_ratio_favorable_08 ', 'unpriv_ratio_unfavorable_06 ',
       'unpriv_ratio_unfavorable_08 ', 'unpriv_total_06 ', 'unpriv_total_08 ']] = result_test[['acc ', 'auc_roc ', 'disparate_impact_favorable_06 ',
       'disparate_impact_favorable_08 ', 'disparate_impact_unfavorable_06 ',
       'disparate_impact_unfavorable_08 ', 'f1 ', 'fnr_priv_06 ',
       'fnr_priv_08 ', 'fnr_total_06 ', 'fnr_total_08 ', 'fnr_unpriv_06 ',
       'fnr_unpriv_08 ', 'fpr_priv_06 ', 'fpr_priv_08 ', 'fpr_total_06 ',
       'fpr_total_08 ', 'fpr_unpriv_06 ', 'fpr_unpriv_08 ', 'loss ',
       'precision ', 'priv_n_06 ', 'priv_n_08 ', 'priv_ratio_favorable_06 ',
       'priv_ratio_favorable_08 ', 'priv_ratio_unfavorable_06 ',
       'priv_ratio_unfavorable_08 ', 'priv_total_06 ', 'priv_total_08 ',
       'recall ', 'unpriv_n_06 ', 'unpriv_n_08 ', 'unpriv_ratio_favorable_06 ',
       'unpriv_ratio_favorable_08 ', 'unpriv_ratio_unfavorable_06 ',
       'unpriv_ratio_unfavorable_08 ', 'unpriv_total_06 ', 'unpriv_total_08 ',]].astype(float)

result_val[['acc ', 'auc_roc ', 'disparate_impact_favorable_06 ',
       'disparate_impact_favorable_08 ', 'disparate_impact_unfavorable_06 ',
       'disparate_impact_unfavorable_08 ', 'f1 ', 'fnr_priv_06 ',
       'fnr_priv_08 ', 'fnr_total_06 ', 'fnr_total_08 ', 'fnr_unpriv_06 ',
       'fnr_unpriv_08 ', 'fpr_priv_06 ', 'fpr_priv_08 ', 'fpr_total_06 ',
       'fpr_total_08 ', 'fpr_unpriv_06 ', 'fpr_unpriv_08 ', 'loss ',
       'precision ', 'priv_n_06 ', 'priv_n_08 ', 'priv_ratio_favorable_06 ',
       'priv_ratio_favorable_08 ', 'priv_ratio_unfavorable_06 ',
       'priv_ratio_unfavorable_08 ', 'priv_total_06 ', 'priv_total_08 ',
       'recall ', 'unpriv_n_06 ', 'unpriv_n_08 ', 'unpriv_ratio_favorable_06 ',
       'unpriv_ratio_favorable_08 ', 'unpriv_ratio_unfavorable_06 ',
       'unpriv_ratio_unfavorable_08 ', 'unpriv_total_06 ', 'unpriv_total_08 ']] = result_val[['acc ', 'auc_roc ', 'disparate_impact_favorable_06 ',
       'disparate_impact_favorable_08 ', 'disparate_impact_unfavorable_06 ',
       'disparate_impact_unfavorable_08 ', 'f1 ', 'fnr_priv_06 ',
       'fnr_priv_08 ', 'fnr_total_06 ', 'fnr_total_08 ', 'fnr_unpriv_06 ',
       'fnr_unpriv_08 ', 'fpr_priv_06 ', 'fpr_priv_08 ', 'fpr_total_06 ',
       'fpr_total_08 ', 'fpr_unpriv_06 ', 'fpr_unpriv_08 ', 'loss ',
       'precision ', 'priv_n_06 ', 'priv_n_08 ', 'priv_ratio_favorable_06 ',
       'priv_ratio_favorable_08 ', 'priv_ratio_unfavorable_06 ',
       'priv_ratio_unfavorable_08 ', 'priv_total_06 ', 'priv_total_08 ',
       'recall ', 'unpriv_n_06 ', 'unpriv_n_08 ', 'unpriv_ratio_favorable_06 ',
       'unpriv_ratio_favorable_08 ', 'unpriv_ratio_unfavorable_06 ',
       'unpriv_ratio_unfavorable_08 ', 'unpriv_total_06 ', 'unpriv_total_08 ',]].astype(float)
result_val.to_csv(f'{root}/bert_expl_stats/oc_val_results_raw.csv', index=False)
result_test.to_csv(f'{root}/bert_expl_stats/oc_test_results_raw.csv', index=False)


In [4]:
result_test

Unnamed: 0,acc,auc_roc,disparate_impact_favorable_06,disparate_impact_favorable_08,disparate_impact_unfavorable_06,disparate_impact_unfavorable_08,eval_loss,eval_loss_reg,f1,fnr_priv_06,...,warmup_proportion,no_cuda,local_rank,seed,gradient_accumulation_steps,fp16,loss_scale,server_ip,server_port,continue_from_checkpoint
0,0.966115,0.987726,0.103887,0.0,1.239538,1.21649,0.0999714,0.00195325,0.979532,0.026854,...,0.1,False,-1,0,1,False,0,,,0
1,0.967326,0.987629,0.098906,0.0,1.256452,1.22955,0.0964696,0.00170301,0.980162,0.032609,...,0.1,False,-1,1,1,False,0,,,0
2,0.965309,0.987645,0.1029,0.0,1.242724,1.218956,0.096135,0.00170339,0.979024,0.028772,...,0.1,False,-1,2,1,False,0,,,0
3,0.964905,0.987033,0.097569,0.0,1.261369,1.233333,0.0976723,0.00068454,0.978661,0.036445,...,0.1,False,-1,3,1,False,0,,,0
4,0.965712,0.988146,0.104639,0.0,1.237159,1.214646,0.101506,0.0051132,0.979304,0.026215,...,0.1,False,-1,4,1,False,0,,,0
5,0.966519,0.986875,0.092665,0.0,1.248465,1.220812,0.0953438,0.00208066,0.979741,0.028772,...,0.1,False,-1,5,1,False,0,,,0
6,0.963292,0.986479,0.100746,0.0,1.249954,1.224542,0.0974008,0.00114924,0.977756,0.033248,...,0.1,False,-1,6,1,False,0,,,0
7,0.966519,0.987424,0.092665,0.0,1.248465,1.220812,0.097387,0.00125022,0.979741,0.028772,...,0.1,False,-1,7,1,False,0,,,0
8,0.966922,0.988082,0.108392,0.0,1.252265,1.228922,0.0980797,0.00166617,0.979922,0.032609,...,0.1,False,-1,8,1,False,0,,,0
9,0.966922,0.988119,0.094669,0.0,1.241267,1.21526,0.0974377,0.00107183,0.980029,0.026215,...,0.1,False,-1,9,1,False,0,,,0


In [5]:
result_val

Unnamed: 0,acc,auc_roc,disparate_impact_favorable_06,disparate_impact_favorable_08,disparate_impact_unfavorable_06,disparate_impact_unfavorable_08,eval_loss,eval_loss_reg,f1,fnr_priv_06,...,warmup_proportion,no_cuda,local_rank,seed,gradient_accumulation_steps,fp16,loss_scale,server_ip,server_port,continue_from_checkpoint
0,0.965295,0.987758,0.106294,0.103284,1.246953,1.200426,0.10094,0.00204131,0.978891,0.027937,...,0.1,False,-1,0,1,False,0,,,0
1,0.964487,0.987087,0.096408,0.103284,1.250421,1.200426,0.099727,0.000982749,0.9784,0.028571,...,0.1,False,-1,1,1,False,0,,,0
2,0.966505,0.98793,0.104129,0.101227,1.254156,1.205902,0.0993027,0.00160624,0.979582,0.029841,...,0.1,False,-1,2,1,False,0,,,0
3,0.96368,0.987147,0.105321,0.10236,1.250144,1.202854,0.0998649,0.00103324,0.977887,0.030476,...,0.1,False,-1,3,1,False,0,,,0
4,0.966102,0.987733,0.126203,0.103284,1.24003,1.200426,0.101016,0.000921663,0.979381,0.027302,...,0.1,False,-1,4,1,False,0,,,0
5,0.964084,0.987243,0.124755,0.102131,1.243994,1.203462,0.106123,0.00130386,0.978127,0.029841,...,0.1,False,-1,5,1,False,0,,,0
6,0.965295,0.987755,0.103894,0.101004,1.254961,1.206514,0.0975835,0.000807614,0.978839,0.031111,...,0.1,False,-1,6,1,False,0,,,0
7,0.964487,0.987062,0.131339,0.099683,1.249307,1.210196,0.105875,0.00120875,0.978314,0.033016,...,0.1,False,-1,7,1,False,0,,,0
8,0.964084,0.986975,0.103195,0.100339,1.257384,1.208352,0.099333,0.000871331,0.978084,0.033016,...,0.1,False,-1,8,1,False,0,,,0
9,0.964891,0.988663,0.104129,0.101227,1.254156,1.205902,0.0984128,0.00103632,0.978598,0.031111,...,0.1,False,-1,9,1,False,0,,,0


In [6]:
def ci_sample(series):
    pop_mean = series.mean()
    pop_std = series.std(ddof=0)
    pop_n =  len(series) - 1
    return pop_mean - (1.96*pop_std/math.sqrt(pop_n)), pop_mean + (1.96*pop_std/math.sqrt(pop_n))

In [7]:
result_val.dtypes


0
acc                                 float64
auc_roc                             float64
disparate_impact_favorable_06       float64
disparate_impact_favorable_08       float64
disparate_impact_unfavorable_06     float64
                                     ...   
fp16                                 object
loss_scale                           object
server_ip                            object
server_port                          object
continue_from_checkpoint             object
Length: 96, dtype: object

In [8]:
result_val.columns.values

array(['acc ', 'auc_roc ', 'disparate_impact_favorable_06 ',
       'disparate_impact_favorable_08 ',
       'disparate_impact_unfavorable_06 ',
       'disparate_impact_unfavorable_08 ', 'eval_loss ', 'eval_loss_reg ',
       'f1 ', 'fnr_priv_06 ', 'fnr_priv_08 ', 'fnr_total_06 ',
       'fnr_total_08 ', 'fnr_unpriv_06 ', 'fnr_unpriv_08 ',
       'fpr_priv_06 ', 'fpr_priv_08 ', 'fpr_total_06 ', 'fpr_total_08 ',
       'fpr_unpriv_06 ', 'fpr_unpriv_08 ', 'global_step ', 'loss ',
       'precision ', 'priv_n_06 ', 'priv_n_08 ',
       'priv_ratio_favorable_06 ', 'priv_ratio_favorable_08 ',
       'priv_ratio_unfavorable_06 ', 'priv_ratio_unfavorable_08 ',
       'priv_total_06 ', 'priv_total_08 ', 'recall ', 'unpriv_n_06 ',
       'unpriv_n_08 ', 'unpriv_ratio_favorable_06 ',
       'unpriv_ratio_favorable_08 ', 'unpriv_ratio_unfavorable_06 ',
       'unpriv_ratio_unfavorable_08 ', 'unpriv_total_06 ',
       'unpriv_total_08 ', 'raw_data_path', 'data_dir', 'label_groups',
       'gab_la

In [9]:
temp_dev = result_val.groupby(['task_name']).agg(
    acc_mean=('acc ', 'mean'),
    acc_std=('acc ', 'std'),
    acc_ci=('acc ', ci_sample),
    auc_roc_mean=('auc_roc ', 'mean'),
    auc_roc_std=('auc_roc ', 'std'),
    auc_roc_ci=('auc_roc ', ci_sample),
    f1_mean=('f1 ', 'mean'),
    f1_std=('f1 ', 'std'),
    f1_ci=('f1 ', ci_sample),
    precision_mean=('precision ', 'mean'),
    precision_std=('precision ', 'std'),
    precision_ci=('precision ', ci_sample),
    recall_mean=('recall ', 'mean'),
    recall_std=('recall ', 'std'),
    recall_ci=('recall ', ci_sample),
    disparate_impact_favorable_06_mean=('disparate_impact_favorable_06 ', np.mean),
    disparate_impact_favorable_06_std=('disparate_impact_favorable_06 ', np.std),
    disparate_impact_favorable_06_ci=('disparate_impact_favorable_06 ',ci_sample),
    disparate_impact_favorable_08_mean=('disparate_impact_favorable_08 ', np.mean),
    disparate_impact_favorable_08_std=('disparate_impact_favorable_08 ',np.std),
    disparate_impact_favorable_08_ci=('disparate_impact_favorable_08 ', ci_sample),
    disparate_impact_unfavorable_06_mean=('disparate_impact_unfavorable_06 ', np.mean),
    disparate_impact_unfavorable_06_std=('disparate_impact_unfavorable_06 ', np.std),
    disparate_impact_unfavorable_06_ci=('disparate_impact_unfavorable_06 ',ci_sample),
    disparate_impact_unfavorable_08_mean=('disparate_impact_unfavorable_08 ', np.mean),
    disparate_impact_unfavorable_08_std=('disparate_impact_unfavorable_08 ',np.std),
    disparate_impact_unfavorable_08_ci=('disparate_impact_unfavorable_08 ', ci_sample),
    fnr_priv_06_mean=('fnr_priv_06 ', np.mean),
    fnr_priv_06_std=('fnr_priv_06 ', np.std),
    fnr_priv_06_ci=('fnr_priv_06 ', ci_sample),
    fnr_priv_08_mean=('fnr_priv_08 ', np.mean),
    fnr_priv_08_std=('fnr_priv_08 ', np.std),
    fnr_priv_08_ci=('fnr_priv_08 ', ci_sample),
    fnr_total_06_mean=('fnr_total_06 ', np.mean),
    fnr_total_06_std=('fnr_total_06 ', np.std),
    fnr_total_06_ci=('fnr_total_06 ', ci_sample),
    fnr_total_08_mean=('fnr_total_08 ', np.mean),
    fnr_total_08_std=('fnr_total_08 ', np.std),
    fnr_total_08_ci=('fnr_total_08 ', ci_sample),
    fnr_unpriv_06_mean=('fnr_unpriv_06 ', np.mean),
    fnr_unpriv_06_std=('fnr_unpriv_06 ', np.std),
    fnr_unpriv_06_ci=('fnr_unpriv_06 ', ci_sample),
    fnr_unpriv_08_mean=('fnr_unpriv_08 ', np.mean),
    fnr_unpriv_08_std=('fnr_unpriv_08 ', np.std),
    fnr_unpriv_08_ci=('fnr_unpriv_08 ', ci_sample),
    fpr_priv_06_mean=('fpr_priv_06 ', np.mean),
    fpr_priv_06_std=('fpr_priv_06 ', np.std),
    fpr_priv_06_ci=('fpr_priv_06 ', ci_sample),
    fpr_priv_08_mean=('fpr_priv_08 ', np.mean),
    fpr_priv_08_std=('fpr_priv_08 ', np.std),
    fpr_priv_08_ci=('fpr_priv_08 ', ci_sample),
    fpr_total_06_mean=('fpr_total_06 ', np.mean),
    fpr_total_06_std=('fpr_total_06 ', np.std),
    fpr_total_06_ci=('fpr_total_06 ', ci_sample),
    fpr_total_08_mean=('fpr_total_08 ', np.mean),
    fpr_total_08_std=('fpr_total_08 ', np.std),
    fpr_total_08_ci=('fpr_total_08 ', ci_sample),
    fpr_unpriv_06_mean=('fpr_unpriv_06 ', np.mean),
    fpr_unpriv_06_std=('fpr_unpriv_06 ', np.std),
    fpr_unpriv_06_ci=('fpr_unpriv_06 ', ci_sample),
    fpr_unpriv_08_mean=('fpr_unpriv_08 ', np.mean),
    fpr_unpriv_08_std=('fpr_unpriv_08 ', np.std),
    fpr_unpriv_08_ci=('fpr_unpriv_08 ', ci_sample),
    priv_n_06=('priv_n_06 ', np.median),
    priv_n_08=('priv_n_08 ', np.median),
    unpriv_n_06=('unpriv_n_06 ', np.median),
    unpriv_n_08=('unpriv_n_08 ', np.median),
).reset_index().sort_values(by=['task_name'])
temp_dev.to_csv(f'{root}/bert_expl_stats/bert_oc_dev_agg_seed_results.csv', index=False)
temp_dev

Unnamed: 0,task_name,acc_mean,acc_std,acc_ci,auc_roc_mean,auc_roc_std,auc_roc_ci,f1_mean,f1_std,f1_ci,...,fpr_unpriv_06_mean,fpr_unpriv_06_std,fpr_unpriv_06_ci,fpr_unpriv_08_mean,fpr_unpriv_08_std,fpr_unpriv_08_ci,priv_n_06,priv_n_08,unpriv_n_06,unpriv_n_08
0,davidson,0.964891,0.000912,"(0.9643255674367032, 0.9654565148877517)",0.987535,0.000531,"(0.98720603990394, 0.9878644677511768)",0.97861,0.000561,"(0.9782624372862306, 0.9789580984206189)",...,0.276923,0.074315,"(0.23086231609172592, 0.32298383775442796)",0.0,0.0,"(0.0, 0.0)",2000.0,2425.0,478.0,53.0
1,founta,0.938608,0.000703,"(0.9381719600949064, 0.9390439180997646)",0.971531,0.000614,"(0.9711507809760194, 0.9719114592977873)",0.888008,0.001492,"(0.8870832458020063, 0.8889327158800272)",...,0.3125,0.029463,"(0.2942387781837517, 0.3307612218162483)",0.0,0.0,"(0.0, 0.0)",9056.0,9185.0,139.0,10.0
2,golbeck,0.78783,0.002498,"(0.7862813177800356, 0.7893779114288895)",0.716557,0.002984,"(0.7147074630110294, 0.7184060497737638)",0.291718,0.024638,"(0.2764477266267267, 0.3069890625798135)",...,0.0,0.0,"(0.0, 0.0)",0.0,0.0,"(0.0, 0.0)",1964.0,1971.0,8.0,1.0
3,harassment,0.90589,0.000748,"(0.9054266121976088, 0.9063538429563406)",0.957883,0.000531,"(0.9575544396961145, 0.9582122648892007)",0.876575,0.000891,"(0.8760229889945704, 0.8771277709502489)",...,0.413953,0.032521,"(0.3937966369330339, 0.4341103398111522)",0.8,0.0,"(0.8, 0.8)",12798.0,13367.0,648.0,79.0
4,hate,0.94056,0.000893,"(0.9400064824536417, 0.9411138662427826)",0.88863,0.004276,"(0.8859801829504511, 0.8912802284975913)",0.367893,0.029,"(0.3499184885440008, 0.38586760965339)",...,0.002752,0.002484,"(0.0012124369508144555, 0.004292150205148847)",0.0,0.0,"(0.0, 0.0)",11805.0,12322.0,584.0,67.0


Unnamed: 0,dataset,task_name,acc_mean,acc_std,acc_ci,auc_roc_mean,auc_roc_std,auc_roc_ci,f1_mean,f1_std,...,fpr_unpriv_06_mean,fpr_unpriv_06_std,fpr_unpriv_06_ci,fpr_unpriv_08_mean,fpr_unpriv_08_std,fpr_unpriv_08_ci,priv_n_06,priv_n_08,unpriv_n_06,unpriv_n_08
0,davidson,glove,0.887328,0.001309,"(0.8865165472814914, 0.8881393312623623)",0.90737,0.001982,"(0.9061414733989916, 0.9085987910167492)",0.932861,0.000617,...,0.446154,0.079446,"(0.39691282158097413, 0.49539487072671834)",0.0,0.0,"(0.0, 0.0)",502.0,74.0,1968.0,2396.0
1,davidson,ngram,0.939312,0.000982,"(0.9387033864172455, 0.939920111068869)",0.977848,0.000251,"(0.9776921062772947, 0.9780029387170595)",0.963891,0.000545,...,0.392308,0.024325,"(0.37723076923076926, 0.40738461538461546)",0.0,0.0,"(0.0, 0.0)",502.0,74.0,1968.0,2396.0
2,davidson,tf_idf,0.871943,0.000333,"(0.871736729002732, 0.8721499086849511)",0.885099,0.000678,"(0.8846788297635392, 0.885519479658)",0.926375,0.00019,...,0.846154,0.0,"(0.8461538461538461, 0.8461538461538461)",0.0,0.0,"(0.0, 0.0)",502.0,74.0,1968.0,2396.0
3,founta,glove,0.899826,0.000561,"(0.8994781086922924, 0.9001732518194875)",0.935428,0.000305,"(0.9352383406918522, 0.935616776152325)",0.808424,0.002534,...,0.294118,0.0,"(0.29411764705882343, 0.29411764705882354)",0.0,0.0,"(0.0, 0.0)",117.0,7.0,9061.0,9171.0
4,founta,ngram,0.935095,0.000388,"(0.9348543425436208, 0.9353352526788524)",0.959789,0.000147,"(0.9596976988256661, 0.9598804242669853)",0.877733,0.000623,...,0.235294,0.0,"(0.2352941176470588, 0.2352941176470588)",0.0,0.0,"(0.0, 0.0)",117.0,7.0,9061.0,9171.0
5,founta,tf_idf,0.895674,0.000542,"(0.8953386787548121, 0.896010207735056)",0.914075,0.000249,"(0.9139211009213997, 0.9142291595270562)",0.787913,0.001371,...,0.294118,0.0,"(0.29411764705882343, 0.29411764705882354)",0.0,0.0,"(0.0, 0.0)",117.0,7.0,9061.0,9171.0
6,golbeck,glove,0.759383,0.000939,"(0.7588012211568581, 0.7599648244135154)",0.587727,0.004376,"(0.5850141425823744, 0.590439031531853)",0.048368,0.006537,...,0.0,0.0,"(0.0, 0.0)",,,"(nan, nan)",13.0,,1932.0,
7,golbeck,ngram,0.779177,0.000737,"(0.7787204922927863, 0.7796342550025933)",0.683468,0.000562,"(0.6831197540509362, 0.6838166554224829)",0.272135,0.005568,...,0.0,0.0,"(0.0, 0.0)",,,"(nan, nan)",13.0,,1932.0,
8,golbeck,tf_idf,0.776555,0.000434,"(0.7762865351041158, 0.7768239928881328)",0.639059,0.000607,"(0.6386833417438993, 0.6394356262660494)",0.261335,0.006933,...,0.0,0.0,"(0.0, 0.0)",,,"(nan, nan)",13.0,,1932.0,
9,harassment,glove,0.839305,0.000563,"(0.8389562937668006, 0.8396544364044031)",0.902648,0.000171,"(0.9025419199844602, 0.9027544176200626)",0.786571,0.001216,...,0.607692,0.016217,"(0.5976410256410256, 0.6177435897435897)",1.0,0.0,"(1.0, 1.0)",596.0,70.0,12850.0,13376.0


In [10]:
temp_test = result_test.groupby(['task_name']).agg(
    acc_mean=('acc ', 'mean'),
    acc_std=('acc ', 'std'),
    acc_ci=('acc ', ci_sample),
    auc_roc_mean=('auc_roc ', 'mean'),
    auc_roc_std=('auc_roc ', 'std'),
    auc_roc_ci=('auc_roc ', ci_sample),
    f1_mean=('f1 ', 'mean'),
    f1_std=('f1 ', 'std'),
    f1_ci=('f1 ', ci_sample),
    precision_mean=('precision ', 'mean'),
    precision_std=('precision ', 'std'),
    precision_ci=('precision ', ci_sample),
    recall_mean=('recall ', 'mean'),
    recall_std=('recall ', 'std'),
    recall_ci=('recall ', ci_sample),
    disparate_impact_favorable_06_mean=('disparate_impact_favorable_06 ', np.mean),
    disparate_impact_favorable_06_std=('disparate_impact_favorable_06 ', np.std),
    disparate_impact_favorable_06_ci=('disparate_impact_favorable_06 ',ci_sample),
    disparate_impact_favorable_08_mean=('disparate_impact_favorable_08 ', np.mean),
    disparate_impact_favorable_08_std=('disparate_impact_favorable_08 ',np.std),
    disparate_impact_favorable_08_ci=('disparate_impact_favorable_08 ', ci_sample),
    disparate_impact_unfavorable_06_mean=('disparate_impact_unfavorable_06 ', np.mean),
    disparate_impact_unfavorable_06_std=('disparate_impact_unfavorable_06 ', np.std),
    disparate_impact_unfavorable_06_ci=('disparate_impact_unfavorable_06 ',ci_sample),
    disparate_impact_unfavorable_08_mean=('disparate_impact_unfavorable_08 ', np.mean),
    disparate_impact_unfavorable_08_std=('disparate_impact_unfavorable_08 ',np.std),
    disparate_impact_unfavorable_08_ci=('disparate_impact_unfavorable_08 ', ci_sample),
    fnr_priv_06_mean=('fnr_priv_06 ', np.mean),
    fnr_priv_06_std=('fnr_priv_06 ', np.std),
    fnr_priv_06_ci=('fnr_priv_06 ', ci_sample),
    fnr_priv_08_mean=('fnr_priv_08 ', np.mean),
    fnr_priv_08_std=('fnr_priv_08 ', np.std),
    fnr_priv_08_ci=('fnr_priv_08 ', ci_sample),
    fnr_total_06_mean=('fnr_total_06 ', np.mean),
    fnr_total_06_std=('fnr_total_06 ', np.std),
    fnr_total_06_ci=('fnr_total_06 ', ci_sample),
    fnr_total_08_mean=('fnr_total_08 ', np.mean),
    fnr_total_08_std=('fnr_total_08 ', np.std),
    fnr_total_08_ci=('fnr_total_08 ', ci_sample),
    fnr_unpriv_06_mean=('fnr_unpriv_06 ', np.mean),
    fnr_unpriv_06_std=('fnr_unpriv_06 ', np.std),
    fnr_unpriv_06_ci=('fnr_unpriv_06 ', ci_sample),
    fnr_unpriv_08_mean=('fnr_unpriv_08 ', np.mean),
    fnr_unpriv_08_std=('fnr_unpriv_08 ', np.std),
    fnr_unpriv_08_ci=('fnr_unpriv_08 ', ci_sample),
    fpr_priv_06_mean=('fpr_priv_06 ', np.mean),
    fpr_priv_06_std=('fpr_priv_06 ', np.std),
    fpr_priv_06_ci=('fpr_priv_06 ', ci_sample),
    fpr_priv_08_mean=('fpr_priv_08 ', np.mean),
    fpr_priv_08_std=('fpr_priv_08 ', np.std),
    fpr_priv_08_ci=('fpr_priv_08 ', ci_sample),
    fpr_total_06_mean=('fpr_total_06 ', np.mean),
    fpr_total_06_std=('fpr_total_06 ', np.std),
    fpr_total_06_ci=('fpr_total_06 ', ci_sample),
    fpr_total_08_mean=('fpr_total_08 ', np.mean),
    fpr_total_08_std=('fpr_total_08 ', np.std),
    fpr_total_08_ci=('fpr_total_08 ', ci_sample),
    fpr_unpriv_06_mean=('fpr_unpriv_06 ', np.mean),
    fpr_unpriv_06_std=('fpr_unpriv_06 ', np.std),
    fpr_unpriv_06_ci=('fpr_unpriv_06 ', ci_sample),
    fpr_unpriv_08_mean=('fpr_unpriv_08 ', np.mean),
    fpr_unpriv_08_std=('fpr_unpriv_08 ', np.std),
    fpr_unpriv_08_ci=('fpr_unpriv_08 ', ci_sample),
    priv_n_06=('priv_n_06 ', np.median),
    priv_n_08=('priv_n_08 ', np.median),
    unpriv_n_06=('unpriv_n_06 ', np.median),
    unpriv_n_08=('unpriv_n_08 ', np.median),
).reset_index().sort_values(by=['task_name'])
temp_test.to_csv(f'{root}/bert_expl_stats/bert_oc_test_agg_seed_results.csv', index=False)
temp_test

Unnamed: 0,task_name,acc_mean,acc_std,acc_ci,auc_roc_mean,auc_roc_std,auc_roc_ci,f1_mean,f1_std,f1_ci,...,fpr_unpriv_06_mean,fpr_unpriv_06_std,fpr_unpriv_06_ci,fpr_unpriv_08_mean,fpr_unpriv_08_std,fpr_unpriv_08_ci,priv_n_06,priv_n_08,unpriv_n_06,unpriv_n_08
0,davidson,0.965954,0.001206,"(0.9652067286302982, 0.9667012988001175)",0.987516,0.000566,"(0.9871649244780194, 0.9878665764287036)",0.979387,0.000738,"(0.9789300471167732, 0.9798443682459512)",...,0.238462,0.043665,"(0.21139763327696504, 0.26552544364611186)",0.0,0.0,"(0.0, 0.0)",1977.0,2405.0,502.0,74.0
1,founta,0.942692,0.000666,"(0.9422794329993714, 0.9431055169788799)",0.971083,0.000995,"(0.9704664748981281, 0.9716997677498301)",0.894592,0.001078,"(0.8939245199526668, 0.8952602500368506)",...,0.205882,0.031003,"(0.18666666666666665, 0.22509803921568627)",0.0,0.0,"(0.0, 0.0)",9079.0,9189.0,117.0,7.0
2,golbeck,0.791988,0.004013,"(0.7895006142134495, 0.7944750450157594)",0.727011,0.00543,"(0.7236457853103337, 0.730376284747636)",0.358975,0.021594,"(0.345591184609725, 0.372359235113698)",...,0.0,0.0,"(0.0, 0.0)",,,"(nan, nan)",1959.0,,13.0,
3,harassment,0.911111,0.000572,"(0.9107568412591425, 0.9114653809630797)",0.962094,0.00046,"(0.9618088859054217, 0.9623786137347967)",0.882513,0.001172,"(0.8817863663321444, 0.8832394629391607)",...,0.392308,0.064867,"(0.3521025641025641, 0.43251282051282053)",0.5,0.333333,"(0.2933978595356659, 0.7066021404643341)",12850.0,13376.0,596.0,70.0
4,hate,0.9383,0.001849,"(0.9371541172516699, 0.9394460926119185)",0.876724,0.008419,"(0.871506052268978, 0.8819418032216243)",0.385876,0.030775,"(0.36680103211291665, 0.4049505889270881)",...,0.029032,0.013118,"(0.020901914037480984, 0.037162602091551276)",0.054098,0.019008,"(0.04231693989071039, 0.06587978142076503)",11820.0,12323.0,569.0,66.0


In [62]:
temp.to_csv('../data/contextualize_results/stats_on_tasks.csv', index=False)

NameError: name 'temp' is not defined

In [None]:
models_dict = {'twitter': {'twitter_es_reg_nb5_h5_is_bal_pos_seed_', 'twitter_es_reg_nb0_h1_bal_seed_', 'twitter_es_vanilla_bal_seed_'},
                'twitter_harass': {'twitter_harass_es_reg_nb5_h5_is_bal_pos_seed_','twitter_harass_es_reg_nb0_h1_bal_seed_','twitter_harass_es_vanilla_bal_seed_'},
               'gab': {'majority_gab_es_vanilla_bal_seed_', 'majority_gab_es_reg_nb0_h1_bal_seed_', 'majority_gab_es_reg_nb5_h5_is_bal_pos_seed_'},
               'ws': {'ws_es_vanilla_bal_seed_', 'ws_es_reg_nb0_h1_bal_seed_','ws_es_reg_nb5_h5_is_bal_pos_seed_'},
               'nyt': {},
               }

In [33]:
task_to_learner_results = None
for task in models_dict.keys():
    if task_to_learner_results is None:
        task_to_learner_results = temp[(temp['model'].isin(models_dict[task])) & (temp['task'] == task)]
    else:
        task_to_learner_results = pd.concat([task_to_learner_results, temp[(temp['model'].isin(models_dict[task])) & (temp['task'] == task)]])
task_to_learner_results

Unnamed: 0,model,task,acc_mean,acc_std,acc_ci,auc_roc_mean,auc_roc_std,auc_roc_ci,f1_mean,f1_std,...,disparate_impact_06_mean,disparate_impact_06_std,disparate_impact_06_ci,disparate_impact_08_mean,disparate_impact_08_std,disparate_impact_08_ci,priv_n_06,priv_n_08,unpriv_n_06,unpriv_n_08
14,twitter_es_reg_nb0_h1_bal_seed_,twitter,0.740416,0.013804,"(0.7318604701540944, 0.7489718244091427)",0.851023,0.005828,"(0.8474105131000839, 0.8546345486463854)",0.400923,0.009749,...,0.857148,0.044112,"(0.8298066061381789, 0.884489005007136)",0.962671,0.097553,"(0.902206724586391, 1.023134586881783)",525.0,64.0,9904.0,10365.0
18,twitter_es_reg_nb5_h5_is_bal_pos_seed_,twitter,0.780775,0.0203,"(0.7681924281851983, 0.7933570971767733)",0.861636,0.006482,"(0.8576180444353265, 0.8656529654302911)",0.434504,0.012329,...,0.826937,0.060878,"(0.7892042012454198, 0.8646696652235802)",0.86694,0.114318,"(0.7960850410549574, 0.9377945217594896)",525.0,64.0,9904.0,10365.0
22,twitter_es_vanilla_bal_seed_,twitter,0.797047,0.016549,"(0.786789594562518, 0.8073037988596702)",0.867681,0.01204,"(0.8602187635615669, 0.8751441492098829)",0.452028,0.014638,...,0.840683,0.035621,"(0.8186045992182596, 0.8627610571687874)",0.874632,0.077094,"(0.8268481619460178, 0.9224149791085426)",525.0,64.0,9904.0,10365.0
27,twitter_harass_es_reg_nb0_h1_bal_seed_,twitter_harass,0.699126,0.10881,"(0.6316849387126151, 0.7665673015302938)",0.91396,0.096742,"(0.8539989489968628, 0.9739213422284224)",0.724786,0.059479,...,inf,,"(nan, nan)",inf,,"(nan, nan)",656.0,77.0,12847.0,13426.0
32,twitter_harass_es_reg_nb5_h5_is_bal_pos_seed_,twitter_harass,0.798674,0.010532,"(0.7921467442694817, 0.8052019930481517)",0.951445,0.00428,"(0.9487928326630026, 0.9540978517512333)",0.790119,0.007984,...,41.367705,8.509532,"(36.09344237057658, 46.64196711913195)",inf,,"(nan, nan)",656.0,77.0,12847.0,13426.0
37,twitter_harass_es_vanilla_bal_seed_,twitter_harass,0.802177,0.013758,"(0.7936501224867812, 0.8107044653825811)",0.951767,0.00501,"(0.9486616697289599, 0.9548716952451467)",0.793076,0.010919,...,35.196131,11.226862,"(28.23764979208232, 42.15461160470309)",inf,,"(nan, nan)",656.0,77.0,12847.0,13426.0
0,majority_gab_es_reg_nb0_h1_bal_seed_,gab,0.862108,0.012026,"(0.8546544856593332, 0.8695623818105465)",0.874507,0.007422,"(0.8699068385251156, 0.8791072046781914)",0.449921,0.018215,...,1.581563,0.638837,"(1.1856077090048187, 1.9775184225582472)",inf,,"(nan, nan)",3.0,1.0,1657.0,1659.0
4,majority_gab_es_reg_nb5_h5_is_bal_pos_seed_,gab,0.881928,0.020701,"(0.8690969473006679, 0.8947584743860789)",0.88638,0.010175,"(0.8800729619505216, 0.8926863453965002)",0.480698,0.030686,...,1.286723,0.038792,"(1.2626797696530172, 1.3107662170699763)",inf,,"(nan, nan)",3.0,1.0,1657.0,1659.0
8,majority_gab_es_vanilla_bal_seed_,gab,0.882831,0.015758,"(0.8730642575235151, 0.8925983930788944)",0.891738,0.007496,"(0.8870917266629993, 0.8963837414831942)",0.488232,0.021547,...,1.582046,0.701131,"(1.1474801485394046, 2.0166115835064615)",inf,,"(nan, nan)",3.0,1.0,1657.0,1659.0
42,ws_es_reg_nb0_h1_bal_seed_,ws,0.895282,0.012831,"(0.8873287167761869, 0.9032344491294447)",0.88033,0.009994,"(0.8741355728690714, 0.8865244461242809)",0.547665,0.02091,...,,,"(nan, nan)",,,"(nan, nan)",,,,


In [34]:
other_tasks_results = temp[(temp['model'].isin(models_dict['twitter'])) & (temp['task'].isin({'gab', 'ws', 'twitter_harass', 'nyt'}))]
other_tasks_results = pd.concat([other_tasks_results, temp[(temp['model'].isin(models_dict['gab'])) & (temp['task'].isin({'twitter', 'twitter_harass','ws', 'nyt'}))]])
other_tasks_results = pd.concat([other_tasks_results, temp[(temp['model'].isin(models_dict['twitter_harass'])) & (temp['task'].isin({'twitter', 'gab', 'ws', 'nyt'}))]])
other_tasks_results = pd.concat([other_tasks_results, temp[(temp['model'].isin(models_dict['ws'])) & (temp['task'].isin({'twitter', 'twitter_harass','gab', 'nyt'}))]])

In [35]:
other_tasks_results

Unnamed: 0,model,task,acc_mean,acc_std,acc_ci,auc_roc_mean,auc_roc_std,auc_roc_ci,f1_mean,f1_std,...,disparate_impact_06_mean,disparate_impact_06_std,disparate_impact_06_ci,disparate_impact_08_mean,disparate_impact_08_std,disparate_impact_08_ci,priv_n_06,priv_n_08,unpriv_n_06,unpriv_n_08
12,twitter_es_reg_nb0_h1_bal_seed_,gab,0.628976,0.036343,"(0.6064502124205815, 0.651501594808334)",0.818889,0.007008,"(0.8145454343478087, 0.8232331002209861)",0.276995,0.014425,...,0.856639,0.060978,"(0.8188442563563496, 0.8944327502821539)",inf,,"(nan, nan)",3.0,1.0,1657.0,1659.0
13,twitter_es_reg_nb0_h1_bal_seed_,nyt,0.692862,0.067703,"(0.6508996380141923, 0.7348249996669673)",0.0,0.0,"(0.0, 0.0)",0.0,0.0,...,,,"(nan, nan)",,,"(nan, nan)",,,,
15,twitter_es_reg_nb0_h1_bal_seed_,ws,0.742009,0.025167,"(0.7264106936761919, 0.7576075711639908)",0.748519,0.01486,"(0.739308099673421, 0.7577289373636157)",0.333568,0.027191,...,,,"(nan, nan)",,,"(nan, nan)",,,,
16,twitter_es_reg_nb5_h5_is_bal_pos_seed_,gab,0.697771,0.04731,"(0.6684477917724448, 0.7270943769022539)",0.821897,0.006244,"(0.8180272449425315, 0.8257673052161579)",0.310001,0.022413,...,1.219403,0.461532,"(0.9333420557721496, 1.5054630136303855)",inf,,"(nan, nan)",3.0,1.0,1657.0,1659.0
17,twitter_es_reg_nb5_h5_is_bal_pos_seed_,nyt,0.794993,0.068235,"(0.7527000530041628, 0.8372854542422141)",0.0,0.0,"(0.0, 0.0)",0.0,0.0,...,,,"(nan, nan)",,,"(nan, nan)",,,,
19,twitter_es_reg_nb5_h5_is_bal_pos_seed_,ws,0.771081,0.030103,"(0.7524226540514041, 0.7897386853702091)",0.750957,0.017452,"(0.740140144293877, 0.7617734359530367)",0.336322,0.026036,...,,,"(nan, nan)",,,"(nan, nan)",,,,
20,twitter_es_vanilla_bal_seed_,gab,0.766506,0.02645,"(0.7501121545689884, 0.7828998936237828)",0.856527,0.003998,"(0.8540483260302403, 0.8590049000279665)",0.365528,0.015665,...,1.142336,0.538762,"(0.8084076131365296, 1.4762634791990161)",inf,,"(nan, nan)",3.0,1.0,1657.0,1659.0
21,twitter_es_vanilla_bal_seed_,nyt,0.639087,0.088046,"(0.5845155413088085, 0.6936583717346699)",0.0,0.0,"(0.0, 0.0)",0.0,0.0,...,,,"(nan, nan)",,,"(nan, nan)",,,,
23,twitter_es_vanilla_bal_seed_,ws,0.770015,0.025544,"(0.7541827073545508, 0.7858477340457535)",0.781363,0.007915,"(0.7764569630741037, 0.7862685829847756)",0.367121,0.020248,...,,,"(nan, nan)",,,"(nan, nan)",,,,
1,majority_gab_es_reg_nb0_h1_bal_seed_,nyt,0.917362,0.024158,"(0.9023890204927413, 0.9323356171884182)",0.0,0.0,"(0.0, 0.0)",0.0,0.0,...,,,"(nan, nan)",,,"(nan, nan)",,,,


In [37]:
task_to_learner_results.to_csv('../data/contextualize_results/task_to_learner.csv', index=False)
other_tasks_results.to_csv('../data/contextualize_results/other_tasks_with_learner.csv', index=False)
