In [12]:
import pandas as pd

In [13]:
def show_est(df, metric, plugin_metric, plugin_models, rscore_models):
    plugin_cols = [f'{metric}_{pm}_{plugin_metric}' for pm in plugin_models]
    rscore_cols = [f'{metric}_rs_{rs_bm}' for rs_bm in rscore_models]
    cols = ['name', f'{metric}_mse', f'{metric}_r2', f'{metric}_mixed'] + plugin_cols + rscore_cols + [f'{metric}_test']
    return df[cols]

def show_base(df, metric, plugin_metric, plugin_models, rscore_models):
    plugin_cols = [f'{metric}_{pm}_{plugin_metric}' for pm in plugin_models]
    rscore_cols = [f'{metric}_rs_{rs_bm}' for rs_bm in rscore_models]
    cols = ['name'] + plugin_cols + rscore_cols + [f'{metric}_test']
    return df[cols]

def show_all(df, metric, plugin_metrics, plugin_models, matching_ks, rscore_models):
    plugin_cols = [f'{metric}_{pm}_{plugin_metric}' for plugin_metric in plugin_metrics for pm in plugin_models]
    matching_cols = [f'{metric}_match_{k}k_{plugin_metric}' for plugin_metric in plugin_metrics for k in matching_ks]
    rscore_cols = [f'{metric}_rs_{rs_bm}' for rs_bm in rscore_models]
    cols = ['name', f'{metric}_mse', f'{metric}_r2'] + plugin_cols + matching_cols + rscore_cols + [f'{metric}_test']
    return df[cols]

def show_all_jobs(df, metric, plugin_metrics, plugin_models, matching_ks, rscore_models):
    plugin_cols = [f'{metric}_{pm}_{plugin_metric}' for plugin_metric in plugin_metrics for pm in plugin_models]
    matching_cols = [f'{metric}_match_{k}k_{plugin_metric}' for plugin_metric in plugin_metrics for k in matching_ks]
    rscore_cols = [f'{metric}_rs_{rs_bm}' for rs_bm in rscore_models]
    cols = ['name', f'{metric}_mse', f'{metric}_r2', f'{metric}_pol'] + plugin_cols + matching_cols + rscore_cols + [f'{metric}_test']
    return df[cols]

In [14]:
def my_filter(x, metric):
    if x[f'{metric}_mse'] != '-':
        return x[f'{metric}_mse']
    elif x[f'{metric}_mixed'] != '-':
        return x[f'{metric}_mixed']
    else:
        return x[f'{metric}_rs_lgbm']

def ate_filter(x):
    return my_filter(x, 'ate')

def pehe_filter(x):
    return my_filter(x, 'pehe')

def att_filter(x):
    return my_filter(x, 'att')

def policy_filter(x):
    return my_filter(x, 'policy')

## IHDP

In [4]:
plugin_meta_models = ['sl', 'tl']
#plugin_base_models = ['dt', 'lgbm', 'cb']
plugin_base_models = ['dt', 'lgbm', 'kr']
plugin_models = [f'{pmm}_{pbm}' for pmm in plugin_meta_models for pbm in plugin_base_models]
matching_ks = [1, 3, 5]
#rscore_base_models = ['dt', 'lgbm', 'cb']
rscore_base_models = ['dt', 'lgbm', 'kr']

ds = 'ihdp'
avg_metric = 'ate'
ite_metric = 'pehe'

### Perspective -- causal estimators

In [6]:
df_all = pd.read_csv(f'./tables/{ds}_compare_metrics_meta_est_sem_latex.csv')
df_all['name'] = df_all['name'].apply(lambda x: x.upper().replace('_', '-'))

df_ate = show_est(df_all, avg_metric, 'ate', plugin_models, rscore_base_models)
df_pehe = show_est(df_all, ite_metric, 'pehe', plugin_models, rscore_base_models)

df_ate['selection'] = df_ate.apply(ate_filter, axis=1)
df_pehe['selection'] = df_pehe.apply(pehe_filter, axis=1)

df_merged = df_ate.merge(df_pehe, on=['name'], suffixes=['_ate', '_pehe'])
print(df_merged[['name', 'selection_ate', 'ate_test', 'selection_pehe', 'pehe_test']].to_latex(index=False, escape=False))


\begin{tabular}{lllll}
\toprule
  name &   selection_ate &        ate_test &  selection_pehe &       pehe_test \\
\midrule
    SL & $0.246\pm0.098$ & $0.001\pm0.001$ & $1.373\pm0.612$ & $1.205\pm0.561$ \\
    TL & $0.168\pm0.098$ & $0.000\pm0.000$ & $0.701\pm0.202$ & $0.621\pm0.200$ \\
 IPSWS & $0.131\pm0.034$ & $0.001\pm0.000$ & $1.552\pm0.726$ & $1.204\pm0.560$ \\
   DRS & $0.211\pm0.079$ & $0.002\pm0.001$ & $1.470\pm0.624$ & $1.275\pm0.581$ \\
  DMLS & $0.438\pm0.136$ & $0.007\pm0.003$ & $1.905\pm0.826$ & $1.679\pm0.830$ \\
    XL & $0.270\pm0.135$ & $0.009\pm0.007$ & $1.276\pm0.448$ & $1.067\pm0.409$ \\
    CF & $0.240\pm0.129$ & $0.198\pm0.131$ & $2.295\pm1.185$ & $2.290\pm1.186$ \\
SL-MLP & $0.486\pm0.128$ & $0.104\pm0.038$ & $1.064\pm0.212$ & $0.925\pm0.224$ \\
TL-MLP & $0.221\pm0.076$ & $0.000\pm0.000$ & $0.894\pm0.183$ & $0.641\pm0.190$ \\
\bottomrule
\end{tabular}



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_ate['selection'] = df_ate.apply(ate_filter, axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_pehe['selection'] = df_pehe.apply(pehe_filter, axis=1)


### Perspective -- base learners

In [7]:
df_all = pd.read_csv(f'./tables/{ds}_compare_metrics_meta_base_sem_latex.csv')
df_all['name'] = df_all['name'].apply(lambda x: x.upper().replace('_', '-'))

df_ate = show_base(df_all, avg_metric, 'ate', plugin_models, rscore_base_models)
df_pehe = show_base(df_all, ite_metric, 'pehe', plugin_models, rscore_base_models)

df_ate['selection'] = df_ate['ate_rs_lgbm']
df_pehe['selection'] = df_pehe['pehe_rs_lgbm']

df_merged = df_ate.merge(df_pehe, on=['name'], suffixes=['_ate', '_pehe'])
print(df_merged[['name', 'selection_ate', 'ate_test', 'selection_pehe', 'pehe_test']].to_latex(index=False, escape=False))


\begin{tabular}{lllll}
\toprule
  name &   selection_ate &        ate_test &  selection_pehe &       pehe_test \\
\midrule
    L1 & $0.340\pm0.129$ & $0.046\pm0.016$ & $1.795\pm0.881$ & $1.643\pm0.884$ \\
    L2 & $0.270\pm0.142$ & $0.161\pm0.112$ & $1.810\pm0.899$ & $1.603\pm0.841$ \\
    DT & $0.399\pm0.202$ & $0.000\pm0.000$ & $2.330\pm1.028$ & $1.890\pm0.932$ \\
    RF & $0.253\pm0.095$ & $0.020\pm0.011$ & $1.909\pm0.968$ & $1.529\pm0.811$ \\
    ET & $0.292\pm0.129$ & $0.003\pm0.002$ & $1.999\pm1.107$ & $1.582\pm0.915$ \\
    KR & $0.282\pm0.104$ & $0.001\pm0.001$ & $1.399\pm0.623$ & $0.653\pm0.195$ \\
    CB & $0.267\pm0.064$ & $0.004\pm0.002$ & $1.453\pm0.700$ & $0.893\pm0.386$ \\
  LGBM & $0.264\pm0.080$ & $0.016\pm0.008$ & $1.881\pm0.907$ & $1.326\pm0.562$ \\
    CF & $0.240\pm0.129$ & $0.198\pm0.131$ & $2.295\pm1.185$ & $2.290\pm1.186$ \\
SL-MLP & $0.342\pm0.067$ & $0.104\pm0.038$ & $1.232\pm0.277$ & $0.925\pm0.224$ \\
TL-MLP & $0.507\pm0.208$ & $0.000\pm0.000$ & $1.399\pm0.4

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_ate['selection'] = df_ate['ate_rs_lgbm']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_pehe['selection'] = df_pehe['pehe_rs_lgbm']


### Perspective -- model selection

In [5]:
df_all = pd.read_csv(f'./tables/{ds}_compare_metrics_all_sem_latex.csv')

df_ate = show_all(df_all, avg_metric, ['ate', 'pehe'], plugin_models, matching_ks, rscore_base_models)
df_pehe = show_all(df_all, ite_metric, ['ate', 'pehe'], plugin_models, matching_ks, rscore_base_models)

selection_models = [f'{pmm}_{pbm}_{pm}' for pmm in plugin_meta_models for pbm in plugin_base_models for pm in ['ate', 'pehe']] + [f'match_{k}k_{pm}' for k in matching_ks for pm in ['ate', 'pehe']] + [f'rs_{rbm}' for rbm in rscore_base_models]
d_ate = {f'{avg_metric}_test': 'best', f'{avg_metric}_mse': 'mse', f'{avg_metric}_r2': 'r2'}
d_pehe = {f'{ite_metric}_test': 'best', f'{ite_metric}_mse': 'mse', f'{ite_metric}_r2': 'r2'}
for sm in selection_models:
    d_ate[f'{avg_metric}_{sm}'] = sm
    d_pehe[f'{ite_metric}_{sm}'] = sm

df_ate = df_ate.rename(columns=d_ate)
df_pehe = df_pehe.rename(columns=d_pehe)

df_ate = df_ate.set_index('name').T
df_pehe = df_pehe.set_index('name').T

df_merged = df_ate.merge(df_pehe, left_index=True, right_index=True, suffixes=['_ate', '_pehe']).reset_index()

df_merged['selection'] = df_merged['index'].apply(lambda x: x.upper().replace('_', '-'))
print(df_merged[['selection', 'all_ate', 'all_pehe']].to_latex(index=False, escape=False))


\begin{tabular}{lll}
\toprule
    selection &         all_ate &        all_pehe \\
\midrule
          MSE & $0.188\pm0.097$ & $0.786\pm0.189$ \\
           R2 & $0.352\pm0.146$ & $0.922\pm0.237$ \\
    SL-DT-ATE & $1.455\pm0.848$ & $3.327\pm1.454$ \\
  SL-LGBM-ATE & $4.791\pm0.904$ & $6.998\pm2.261$ \\
    SL-KR-ATE & $1.430\pm0.608$ & $2.868\pm0.978$ \\
    TL-DT-ATE & $0.201\pm0.057$ & $1.873\pm0.393$ \\
  TL-LGBM-ATE & $0.237\pm0.082$ & $1.983\pm0.434$ \\
    TL-KR-ATE & $0.409\pm0.115$ & $1.796\pm0.690$ \\
   SL-DT-PEHE & $1.655\pm1.269$ & $4.188\pm2.652$ \\
 SL-LGBM-PEHE & $4.791\pm0.904$ & $6.998\pm2.261$ \\
   SL-KR-PEHE & $1.652\pm0.788$ & $4.438\pm2.248$ \\
   TL-DT-PEHE & $0.267\pm0.125$ & $2.605\pm1.433$ \\
 TL-LGBM-PEHE & $0.306\pm0.173$ & $2.357\pm1.014$ \\
   TL-KR-PEHE & $0.209\pm0.048$ & $1.341\pm0.518$ \\
 MATCH-1K-ATE & $0.323\pm0.098$ & $3.199\pm1.585$ \\
 MATCH-3K-ATE & $0.176\pm0.039$ & $1.640\pm0.352$ \\
 MATCH-5K-ATE & $0.209\pm0.065$ & $1.841\pm0.478$ \\
MATCH-1

## JOBS

In [15]:
plugin_meta_models = ['sl', 'tl']
#plugin_base_models = ['dt', 'lgbm']
plugin_base_models = ['dt', 'lgbm', 'kr']
plugin_models = [f'{pmm}_{pbm}' for pmm in plugin_meta_models for pbm in plugin_base_models]
matching_ks = [1, 3, 5]
#rscore_base_models = ['dt', 'lgbm', 'cb']
rscore_base_models = ['dt', 'lgbm', 'kr']

ds = 'jobs'
avg_metric = 'att'
ite_metric = 'policy'

### Perspective -- causal estimators

In [9]:
df_all = pd.read_csv(f'./tables/{ds}_compare_metrics_meta_est_sem_latex.csv')
df_all['name'] = df_all['name'].apply(lambda x: x.upper().replace('_', '-'))

df_att = show_est(df_all, avg_metric, 'ate', plugin_models, rscore_base_models)
df_pol = show_est(df_all, ite_metric, 'pehe', plugin_models, rscore_base_models)

df_att['selection'] = df_att.apply(att_filter, axis=1)
df_pol['selection'] = df_pol.apply(policy_filter, axis=1)

df_merged = df_att.merge(df_pol, on=['name'], suffixes=['_att', '_pol'])

print(df_merged[['name', 'selection_att', 'att_test', 'selection_pol', 'policy_test']].to_latex(index=False, escape=False))

\begin{tabular}{lllll}
\toprule
  name &   selection_att &        att_test &   selection_pol &     policy_test \\
\midrule
    SL & $0.066\pm0.020$ & $0.003\pm0.001$ & $0.261\pm0.019$ & $0.158\pm0.011$ \\
    TL & $0.074\pm0.023$ & $0.000\pm0.000$ & $0.235\pm0.019$ & $0.128\pm0.012$ \\
 IPSWS & $0.077\pm0.025$ & $0.024\pm0.019$ & $0.245\pm0.013$ & $0.158\pm0.013$ \\
   DRS & $0.075\pm0.023$ & $0.001\pm0.001$ & $0.240\pm0.010$ & $0.149\pm0.010$ \\
  DMLS & $0.078\pm0.023$ & $0.016\pm0.005$ & $0.264\pm0.014$ & $0.193\pm0.012$ \\
    XL & $0.077\pm0.025$ & $0.003\pm0.002$ & $0.233\pm0.019$ & $0.153\pm0.013$ \\
    CF & $0.072\pm0.021$ & $0.053\pm0.022$ & $0.225\pm0.016$ & $0.204\pm0.017$ \\
SL-MLP & $0.068\pm0.022$ & $0.023\pm0.019$ & $0.254\pm0.018$ & $0.162\pm0.010$ \\
TL-MLP & $0.066\pm0.026$ & $0.010\pm0.010$ & $0.226\pm0.014$ & $0.132\pm0.009$ \\
\bottomrule
\end{tabular}



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_att['selection'] = df_att.apply(att_filter, axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_pol['selection'] = df_pol.apply(policy_filter, axis=1)


### Perspective -- base learners

In [10]:
df_all = pd.read_csv(f'./tables/{ds}_compare_metrics_meta_base_sem_latex.csv')
df_all['name'] = df_all['name'].apply(lambda x: x.upper().replace('_', '-'))

df_att = show_base(df_all, avg_metric, 'ate', plugin_models, rscore_base_models)
df_pol = show_base(df_all, ite_metric, 'pehe', plugin_models, rscore_base_models)

df_att['selection'] = df_att['att_rs_lgbm']
df_pol['selection'] = df_pol['policy_rs_lgbm']

df_merged = df_att.merge(df_pol, on=['name'], suffixes=['_att', '_pol'])

print(df_merged[['name', 'selection_att', 'att_test', 'selection_pol', 'policy_test']].to_latex(index=False, escape=False))

\begin{tabular}{lllll}
\toprule
  name &   selection_att &        att_test &   selection_pol &     policy_test \\
\midrule
    L1 & $0.093\pm0.024$ & $0.032\pm0.022$ & $0.270\pm0.024$ & $0.197\pm0.013$ \\
    L2 & $0.082\pm0.025$ & $0.061\pm0.023$ & $0.224\pm0.019$ & $0.199\pm0.014$ \\
    DT & $0.062\pm0.019$ & $0.002\pm0.001$ & $0.233\pm0.019$ & $0.142\pm0.010$ \\
    RF & $0.074\pm0.021$ & $0.011\pm0.006$ & $0.239\pm0.017$ & $0.155\pm0.015$ \\
    ET & $0.073\pm0.023$ & $0.014\pm0.008$ & $0.223\pm0.016$ & $0.148\pm0.013$ \\
    KR & $0.085\pm0.026$ & $0.000\pm0.000$ & $0.262\pm0.024$ & $0.141\pm0.009$ \\
    CB & $0.066\pm0.021$ & $0.025\pm0.011$ & $0.212\pm0.015$ & $0.156\pm0.011$ \\
  LGBM & $0.076\pm0.024$ & $0.009\pm0.003$ & $0.221\pm0.015$ & $0.174\pm0.011$ \\
    CF & $0.072\pm0.021$ & $0.053\pm0.022$ & $0.225\pm0.016$ & $0.204\pm0.017$ \\
SL-MLP & $0.075\pm0.025$ & $0.023\pm0.019$ & $0.251\pm0.014$ & $0.162\pm0.010$ \\
TL-MLP & $0.088\pm0.025$ & $0.010\pm0.010$ & $0.216\pm0.0

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_att['selection'] = df_att['att_rs_lgbm']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_pol['selection'] = df_pol['policy_rs_lgbm']


### Perspective -- model selection

In [16]:
df_all = pd.read_csv(f'./tables/{ds}_compare_metrics_all_sem_latex.csv')

df_ate = show_all_jobs(df_all, avg_metric, ['ate', 'pehe'], plugin_models, matching_ks, rscore_base_models)
df_pehe = show_all_jobs(df_all, ite_metric, ['ate', 'pehe'], plugin_models, matching_ks, rscore_base_models)

selection_models = [f'{pmm}_{pbm}_{pm}' for pmm in plugin_meta_models for pbm in plugin_base_models for pm in ['ate', 'pehe']] + [f'match_{k}k_{pm}' for k in matching_ks for pm in ['ate', 'pehe']] + [f'rs_{rbm}' for rbm in rscore_base_models]
d_ate = {f'{avg_metric}_test': 'best', f'{avg_metric}_mse': 'mse', f'{avg_metric}_r2': 'r2', f'{avg_metric}_pol': 'pol'}
d_pehe = {f'{ite_metric}_test': 'best', f'{ite_metric}_mse': 'mse', f'{ite_metric}_r2': 'r2', f'{ite_metric}_pol': 'pol'}
for sm in selection_models:
    d_ate[f'{avg_metric}_{sm}'] = sm
    d_pehe[f'{ite_metric}_{sm}'] = sm

df_ate = df_ate.rename(columns=d_ate)
df_pehe = df_pehe.rename(columns=d_pehe)

df_ate = df_ate.set_index('name').T
df_pehe = df_pehe.set_index('name').T

df_merged = df_ate.merge(df_pehe, left_index=True, right_index=True, suffixes=['_att', '_pol']).reset_index()

df_merged['selection'] = df_merged['index'].apply(lambda x: x.upper().replace('_', '-'))
print(df_merged[['selection', 'all_att', 'all_pol']].to_latex(index=False, escape=False))


\begin{tabular}{lll}
\toprule
    selection &         all_att &         all_pol \\
\midrule
          MSE & $0.077\pm0.025$ & $0.245\pm0.013$ \\
           R2 & $0.072\pm0.022$ & $0.257\pm0.019$ \\
          POL & $0.300\pm0.090$ & $0.220\pm0.016$ \\
    SL-DT-ATE & $0.080\pm0.026$ & $0.275\pm0.024$ \\
  SL-LGBM-ATE & $0.075\pm0.025$ & $0.267\pm0.015$ \\
    SL-KR-ATE & $0.068\pm0.022$ & $0.233\pm0.023$ \\
    TL-DT-ATE & $0.086\pm0.026$ & $0.245\pm0.017$ \\
  TL-LGBM-ATE & $0.081\pm0.027$ & $0.250\pm0.016$ \\
    TL-KR-ATE & $0.066\pm0.013$ & $0.222\pm0.010$ \\
   SL-DT-PEHE & $0.083\pm0.026$ & $0.297\pm0.020$ \\
 SL-LGBM-PEHE & $0.083\pm0.026$ & $0.296\pm0.020$ \\
   SL-KR-PEHE & $0.076\pm0.024$ & $0.248\pm0.018$ \\
   TL-DT-PEHE & $0.065\pm0.022$ & $0.248\pm0.016$ \\
 TL-LGBM-PEHE & $0.073\pm0.022$ & $0.247\pm0.015$ \\
   TL-KR-PEHE & $0.085\pm0.023$ & $0.244\pm0.015$ \\
 MATCH-1K-ATE & $0.161\pm0.076$ & $0.229\pm0.018$ \\
 MATCH-3K-ATE & $0.158\pm0.077$ & $0.227\pm0.014$ \\
 MATCH-