In [1]:
import pandas as pd

In [6]:
def show_est(df, metric, plugin_metric, plugin_models, rscore_models, matching_models):
    plugin_cols = [f'{metric}_{pm}_{plugin_metric}' for pm in plugin_models]
    rscore_cols = [f'{metric}_rs_{rs_bm}' for rs_bm in rscore_models]
    matching_cols = [f'{metric}_match_{k}k_{plugin_metric}' for k in matching_models]
    cols = ['name', f'{metric}_mse', f'{metric}_r2', f'{metric}_mixed'] + plugin_cols + rscore_cols + matching_cols + [f'{metric}_test']
    return df[cols]

def show_base(df, metric, plugin_metric, plugin_models, rscore_models, matching_models):
    plugin_cols = [f'{metric}_{pm}_{plugin_metric}' for pm in plugin_models]
    rscore_cols = [f'{metric}_rs_{rs_bm}' for rs_bm in rscore_models]
    matching_cols = [f'{metric}_match_{k}k_{plugin_metric}' for k in matching_models]
    cols = ['name'] + plugin_cols + rscore_cols + matching_cols + [f'{metric}_test']
    return df[cols]

def show_all(df, metric, plugin_metrics, plugin_models, matching_ks, rscore_models, ensemble_types):
    plugin_cols = [f'{metric}_{pm}_{plugin_metric}' for plugin_metric in plugin_metrics for pm in plugin_models]
    matching_cols = [f'{metric}_match_{k}k_{plugin_metric}' for plugin_metric in plugin_metrics for k in matching_ks]
    rscore_cols = [f'{metric}_rs_{rs_bm}' for rs_bm in rscore_models]
    ensemble_cols = [f'{metric}_ensemble_{en_type}' for en_type in ensemble_types]
    cols = ['name', f'{metric}_mse', f'{metric}_r2'] + plugin_cols + matching_cols + rscore_cols + ensemble_cols + [f'{metric}_test']
    return df[cols]

def show_all_jobs(df, metric, plugin_metrics, plugin_models, matching_ks, rscore_models):
    plugin_cols = [f'{metric}_{pm}_{plugin_metric}' for plugin_metric in plugin_metrics for pm in plugin_models]
    matching_cols = [f'{metric}_match_{k}k_{plugin_metric}' for plugin_metric in plugin_metrics for k in matching_ks]
    rscore_cols = [f'{metric}_rs_{rs_bm}' for rs_bm in rscore_models]
    cols = ['name', f'{metric}_mse', f'{metric}_r2', f'{metric}_pol'] + plugin_cols + matching_cols + rscore_cols + [f'{metric}_test']
    return df[cols]

In [3]:
def my_filter(x, metric):
    if x[f'{metric}_mse'] != '-':
        return x[f'{metric}_mse']
    elif x[f'{metric}_mixed'] != '-':
        return x[f'{metric}_mixed']
    else:
        #return x[f'{metric}_rs_lgbm']
        return '-'

def ate_filter(x):
    return my_filter(x, 'ate')

def pehe_filter(x):
    return my_filter(x, 'pehe')

def att_filter(x):
    return my_filter(x, 'att')

def policy_filter(x):
    return my_filter(x, 'policy')

## IHDP

In [7]:
plugin_meta_models = ['sl', 'tl']
#plugin_base_models = ['dt', 'lgbm', 'cb']
plugin_base_models = ['dt', 'lgbm', 'kr']
plugin_models = [f'{pmm}_{pbm}' for pmm in plugin_meta_models for pbm in plugin_base_models]
matching_ks = [1, 3, 5]
#rscore_base_models = ['dt', 'lgbm', 'cb']
rscore_base_models = ['dt', 'lgbm', 'kr']
ensemble_types = ['all', 'ate', 'pehe']

ds = 'ihdp'
avg_metric = 'ate'
ite_metric = 'pehe'

### Perspective -- causal estimators

In [7]:
df_all = pd.read_csv(f'./tables/{ds}_compare_metrics_meta_est_sem_latex.csv')
df_all['name'] = df_all['name'].apply(lambda x: x.upper().replace('_', '-'))

df_ate = show_est(df_all, avg_metric, 'pehe', plugin_models, rscore_base_models, matching_ks)
df_pehe = show_est(df_all, ite_metric, 'pehe', plugin_models, rscore_base_models, matching_ks)

df_ate['selection'] = df_ate.apply(ate_filter, axis=1)
df_pehe['selection'] = df_pehe.apply(pehe_filter, axis=1)

df_merged = df_ate.merge(df_pehe, on=['name'], suffixes=['_ate', '_pehe'])

#print(df_merged[['name', 'selection_ate', 'ate_test', 'selection_pehe', 'pehe_test']].to_latex(index=False, escape=False))
#print(df_merged.columns)
print(df_merged[['name', 'selection_ate', 'ate_r2', 'ate_tl_kr_pehe', 'ate_match_1k_pehe', 'ate_rs_lgbm', 'ate_test']].to_latex(index=False, escape=False))


\begin{tabular}{lllllll}
\toprule
  name &   selection_ate &          ate_r2 &  ate_tl_kr_pehe & ate_match_1k_pehe &     ate_rs_lgbm &        ate_test \\
\midrule
    SL & $0.246\pm0.098$ & $0.184\pm0.052$ & $0.264\pm0.060$ &   $0.223\pm0.094$ & $0.318\pm0.062$ & $0.001\pm0.001$ \\
    TL & $0.168\pm0.098$ & $0.180\pm0.096$ & $0.151\pm0.045$ &   $0.143\pm0.070$ & $0.202\pm0.068$ & $0.000\pm0.000$ \\
 IPSWS & $0.131\pm0.034$ & $0.261\pm0.093$ & $0.239\pm0.058$ &   $0.215\pm0.094$ & $0.315\pm0.086$ & $0.001\pm0.000$ \\
   DRS & $0.211\pm0.079$ &               - & $0.182\pm0.041$ &   $0.163\pm0.064$ & $0.533\pm0.350$ & $0.002\pm0.001$ \\
  DMLS & $0.438\pm0.136$ &               - & $0.290\pm0.047$ &   $0.282\pm0.117$ & $0.508\pm0.181$ & $0.007\pm0.003$ \\
    XL &               - &               - & $0.234\pm0.085$ &   $0.275\pm0.132$ & $0.270\pm0.135$ & $0.009\pm0.007$ \\
    CF &               - &               - & $0.241\pm0.129$ &   $0.241\pm0.129$ & $0.240\pm0.129$ & $0.198\pm0.131$ 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_ate['selection'] = df_ate.apply(ate_filter, axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_pehe['selection'] = df_pehe.apply(pehe_filter, axis=1)


In [8]:
print(df_merged[['name', 'selection_pehe', 'pehe_r2', 'pehe_tl_kr_pehe', 'pehe_match_1k_pehe', 'pehe_rs_lgbm', 'pehe_test']].to_latex(index=False, escape=False))

\begin{tabular}{lllllll}
\toprule
  name &  selection_pehe &         pehe_r2 & pehe_tl_kr_pehe & pehe_match_1k_pehe &    pehe_rs_lgbm &       pehe_test \\
\midrule
    SL & $1.373\pm0.612$ & $1.390\pm0.644$ & $1.549\pm0.673$ &    $1.296\pm0.618$ & $1.548\pm0.697$ & $1.205\pm0.561$ \\
    TL & $0.701\pm0.202$ & $0.724\pm0.199$ & $1.214\pm0.536$ &    $0.747\pm0.239$ & $1.129\pm0.379$ & $0.621\pm0.200$ \\
 IPSWS & $1.552\pm0.726$ & $2.117\pm0.932$ & $1.517\pm0.647$ &    $1.309\pm0.616$ & $1.396\pm0.609$ & $1.204\pm0.560$ \\
   DRS & $1.470\pm0.624$ &               - & $1.688\pm0.794$ &    $1.508\pm0.755$ & $1.658\pm0.793$ & $1.275\pm0.581$ \\
  DMLS & $1.905\pm0.826$ &               - & $1.890\pm0.898$ &    $1.827\pm0.893$ & $2.005\pm0.880$ & $1.679\pm0.830$ \\
    XL &               - &               - & $1.317\pm0.498$ &    $1.154\pm0.441$ & $1.276\pm0.448$ & $1.067\pm0.409$ \\
    CF &               - &               - & $2.300\pm1.185$ &    $2.295\pm1.185$ & $2.295\pm1.185$ & $2.290\p

### Perspective -- base learners

In [5]:
df_all = pd.read_csv(f'./tables/{ds}_compare_metrics_meta_base_sem_latex.csv')
df_all['name'] = df_all['name'].apply(lambda x: x.upper().replace('_', '-'))

df_ate = show_base(df_all, avg_metric, 'pehe', plugin_models, rscore_base_models, matching_ks)
df_pehe = show_base(df_all, ite_metric, 'pehe', plugin_models, rscore_base_models, matching_ks)

#df_ate['selection'] = df_ate['ate_rs_lgbm']
#df_pehe['selection'] = df_pehe['pehe_rs_lgbm']

df_merged = df_ate.merge(df_pehe, on=['name'], suffixes=['_ate', '_pehe'])
#print(df_merged[['name', 'selection_ate', 'ate_test', 'selection_pehe', 'pehe_test']].to_latex(index=False, escape=False))

print(df_merged[['name', 'ate_tl_kr_pehe', 'ate_match_1k_pehe', 'ate_rs_lgbm', 'ate_test']].to_latex(index=False, escape=False))


\begin{tabular}{lllll}
\toprule
name &  ate_tl_kr_pehe & ate_match_1k_pehe &     ate_rs_lgbm &        ate_test \\
\midrule
  L1 & $0.277\pm0.100$ &   $0.226\pm0.100$ & $0.340\pm0.129$ & $0.046\pm0.016$ \\
  L2 & $0.330\pm0.179$ &   $0.215\pm0.116$ & $0.270\pm0.142$ & $0.161\pm0.112$ \\
  DT & $0.620\pm0.201$ &   $0.451\pm0.293$ & $0.399\pm0.202$ & $0.000\pm0.000$ \\
  RF & $0.204\pm0.054$ &   $0.109\pm0.043$ & $0.253\pm0.095$ & $0.020\pm0.011$ \\
  ET & $0.220\pm0.107$ &   $0.281\pm0.191$ & $0.292\pm0.129$ & $0.003\pm0.002$ \\
  KR & $0.190\pm0.042$ &   $0.137\pm0.072$ & $0.282\pm0.104$ & $0.001\pm0.001$ \\
  CB & $0.242\pm0.061$ &   $0.109\pm0.019$ & $0.267\pm0.064$ & $0.004\pm0.002$ \\
LGBM & $0.351\pm0.066$ &   $0.183\pm0.069$ & $0.264\pm0.080$ & $0.016\pm0.008$ \\
 MLP & $0.415\pm0.139$ &   $0.379\pm0.165$ & $0.507\pm0.208$ & $0.000\pm0.000$ \\
\bottomrule
\end{tabular}



In [6]:
print(df_merged[['name', 'pehe_tl_kr_pehe', 'pehe_match_1k_pehe', 'pehe_rs_lgbm', 'pehe_test']].to_latex(index=False, escape=False))

\begin{tabular}{lllll}
\toprule
name & pehe_tl_kr_pehe & pehe_match_1k_pehe &    pehe_rs_lgbm &       pehe_test \\
\midrule
  L1 & $1.820\pm0.887$ &    $1.698\pm0.904$ & $1.795\pm0.881$ & $1.643\pm0.884$ \\
  L2 & $1.738\pm0.859$ &    $1.617\pm0.840$ & $1.810\pm0.899$ & $1.603\pm0.841$ \\
  DT & $2.294\pm0.978$ &    $2.189\pm1.037$ & $2.330\pm1.028$ & $1.890\pm0.932$ \\
  RF & $1.816\pm0.818$ &    $1.718\pm0.822$ & $1.909\pm0.968$ & $1.529\pm0.811$ \\
  ET & $1.955\pm1.026$ &    $1.788\pm0.999$ & $1.999\pm1.107$ & $1.582\pm0.915$ \\
  KR & $1.306\pm0.523$ &    $0.746\pm0.239$ & $1.399\pm0.623$ & $0.653\pm0.195$ \\
  CB & $1.428\pm0.684$ &    $0.972\pm0.410$ & $1.453\pm0.700$ & $0.893\pm0.386$ \\
LGBM & $1.976\pm0.939$ &    $1.418\pm0.599$ & $1.881\pm0.907$ & $1.326\pm0.562$ \\
 MLP & $1.531\pm0.685$ &    $1.179\pm0.459$ & $1.399\pm0.496$ & $0.641\pm0.190$ \\
\bottomrule
\end{tabular}



In [7]:
df_all = pd.read_csv(f'./tables/{ds}_compare_metrics_meta_base_sem_latex_nn.csv')

print(df_all.to_latex(index=False, escape=False))


\begin{tabular}{lll}
\toprule
name &        ate_test &       pehe_test \\
\midrule
 mlp & $0.000\pm0.000$ & $0.641\pm0.190$ \\
\bottomrule
\end{tabular}



### Perspective -- model selection

In [8]:
#df_all = pd.read_csv(f'./tables/{ds}_compare_metrics_all_sem_latex.csv')
df_all = pd.read_csv(f'./tables/{ds}_compare_metrics_all.csv')

df_ate = show_all(df_all, avg_metric, ['ate', 'pehe'], plugin_models, matching_ks, rscore_base_models, ensemble_types)
df_pehe = show_all(df_all, ite_metric, ['ate', 'pehe'], plugin_models, matching_ks, rscore_base_models, ensemble_types)

selection_models = [f'{pmm}_{pbm}_{pm}' for pmm in plugin_meta_models for pbm in plugin_base_models for pm in ['ate', 'pehe']] + [f'match_{k}k_{pm}' for k in matching_ks for pm in ['ate', 'pehe']] + [f'rs_{rbm}' for rbm in rscore_base_models] + [f'ensemble_{en_type}' for en_type in ensemble_types]
d_ate = {f'{avg_metric}_test': 'best', f'{avg_metric}_mse': 'mse', f'{avg_metric}_r2': 'r2'}
d_pehe = {f'{ite_metric}_test': 'best', f'{ite_metric}_mse': 'mse', f'{ite_metric}_r2': 'r2'}
for sm in selection_models:
    d_ate[f'{avg_metric}_{sm}'] = sm
    d_pehe[f'{ite_metric}_{sm}'] = sm

df_ate = df_ate.rename(columns=d_ate)
df_pehe = df_pehe.rename(columns=d_pehe)

df_ate = df_ate.set_index('name').T
df_pehe = df_pehe.set_index('name').T

df_merged = df_ate.merge(df_pehe, left_index=True, right_index=True, suffixes=['_ate', '_pehe']).reset_index()

df_merged['selection'] = df_merged['index'].apply(lambda x: x.upper().replace('_', '-'))
print(df_merged[['selection', 'all_ate', 'all_pehe']].to_latex(index=False, escape=False))


\begin{tabular}{lll}
\toprule
    selection &         all_ate &        all_pehe \\
\midrule
          MSE & $0.188\pm0.097$ & $0.786\pm0.189$ \\
           R2 & $0.352\pm0.146$ & $0.922\pm0.237$ \\
    SL-DT-ATE & $1.455\pm0.848$ & $3.327\pm1.454$ \\
  SL-LGBM-ATE & $4.791\pm0.904$ & $6.998\pm2.261$ \\
    SL-KR-ATE & $1.430\pm0.608$ & $2.868\pm0.978$ \\
    TL-DT-ATE & $0.201\pm0.057$ & $1.873\pm0.393$ \\
  TL-LGBM-ATE & $0.237\pm0.082$ & $1.983\pm0.434$ \\
    TL-KR-ATE & $0.409\pm0.115$ & $1.796\pm0.690$ \\
   SL-DT-PEHE & $1.655\pm1.269$ & $4.188\pm2.652$ \\
 SL-LGBM-PEHE & $4.791\pm0.904$ & $6.998\pm2.261$ \\
   SL-KR-PEHE & $1.652\pm0.788$ & $4.438\pm2.248$ \\
   TL-DT-PEHE & $0.267\pm0.125$ & $2.605\pm1.433$ \\
 TL-LGBM-PEHE & $0.306\pm0.173$ & $2.357\pm1.014$ \\
   TL-KR-PEHE & $0.209\pm0.048$ & $1.341\pm0.518$ \\
 MATCH-1K-ATE & $0.323\pm0.098$ & $3.199\pm1.585$ \\
 MATCH-3K-ATE & $0.176\pm0.039$ & $1.640\pm0.352$ \\
 MATCH-5K-ATE & $0.209\pm0.065$ & $1.841\pm0.478$ \\
MATCH-1

## JOBS

In [7]:
plugin_meta_models = ['sl', 'tl']
#plugin_base_models = ['dt', 'lgbm']
plugin_base_models = ['dt', 'lgbm', 'kr']
plugin_models = [f'{pmm}_{pbm}' for pmm in plugin_meta_models for pbm in plugin_base_models]
matching_ks = [1, 3, 5]
#rscore_base_models = ['dt', 'lgbm', 'cb']
rscore_base_models = ['dt', 'lgbm', 'kr']

ds = 'jobs'
avg_metric = 'att'
ite_metric = 'policy'

### Perspective -- causal estimators

In [21]:
df_all = pd.read_csv(f'./tables/{ds}_compare_metrics_meta_est_sem_latex.csv')
df_all['name'] = df_all['name'].apply(lambda x: x.upper().replace('_', '-'))

df_att = show_est(df_all, avg_metric, 'pehe', plugin_models, rscore_base_models, matching_ks)
df_pol = show_est(df_all, ite_metric, 'pehe', plugin_models, rscore_base_models, matching_ks)

df_att['selection'] = df_att.apply(att_filter, axis=1)
df_pol['selection'] = df_pol.apply(policy_filter, axis=1)

df_merged = df_att.merge(df_pol, on=['name'], suffixes=['_att', '_pol'])

#print(df_merged[['name', 'selection_att', 'att_test', 'selection_pol', 'policy_test']].to_latex(index=False, escape=False))
#print(df_merged.columns)
print(df_merged[['name', 'selection_att', 'att_r2', 'att_tl_kr_pehe', 'att_match_1k_pehe', 'att_rs_lgbm', 'att_test']].to_latex(index=False, escape=False))

\begin{tabular}{lllllll}
\toprule
  name &   selection_att &          att_r2 &  att_tl_kr_pehe & att_match_1k_pehe &     att_rs_lgbm &        att_test \\
\midrule
    SL & $0.066\pm0.020$ & $0.076\pm0.020$ & $0.086\pm0.030$ &   $0.071\pm0.022$ & $0.076\pm0.023$ & $0.003\pm0.001$ \\
    TL & $0.074\pm0.023$ & $0.077\pm0.022$ & $0.083\pm0.023$ &   $0.080\pm0.023$ & $0.081\pm0.025$ & $0.000\pm0.000$ \\
 IPSWS & $0.077\pm0.025$ & $0.080\pm0.024$ & $0.069\pm0.022$ &   $0.079\pm0.017$ & $0.079\pm0.020$ & $0.024\pm0.019$ \\
   DRS & $0.075\pm0.023$ &               - & $0.123\pm0.037$ &   $0.080\pm0.023$ & $0.064\pm0.019$ & $0.001\pm0.001$ \\
  DMLS & $0.078\pm0.023$ &               - & $0.083\pm0.025$ &   $0.079\pm0.025$ & $0.107\pm0.031$ & $0.016\pm0.005$ \\
    XL &               - &               - & $0.092\pm0.036$ &   $0.078\pm0.026$ & $0.077\pm0.025$ & $0.003\pm0.002$ \\
    CF &               - &               - & $0.074\pm0.021$ &   $0.077\pm0.023$ & $0.072\pm0.021$ & $0.053\pm0.022$ 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_att['selection'] = df_att.apply(att_filter, axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_pol['selection'] = df_pol.apply(policy_filter, axis=1)


In [26]:
print(df_merged[['name', 'selection_pol', 'policy_r2', 'policy_tl_kr_pehe', 'policy_match_1k_pehe', 'policy_rs_lgbm', 'policy_test']].to_latex(index=False, escape=False))

\begin{tabular}{lllllll}
\toprule
  name &   selection_pol &       policy_r2 & policy_tl_kr_pehe & policy_match_1k_pehe &  policy_rs_lgbm &     policy_test \\
\midrule
    SL & $0.261\pm0.019$ & $0.262\pm0.021$ &   $0.236\pm0.018$ &      $0.250\pm0.019$ & $0.211\pm0.012$ & $0.158\pm0.011$ \\
    TL & $0.235\pm0.019$ & $0.237\pm0.018$ &   $0.241\pm0.015$ &      $0.245\pm0.013$ & $0.241\pm0.016$ & $0.128\pm0.012$ \\
 IPSWS & $0.245\pm0.013$ & $0.245\pm0.011$ &   $0.245\pm0.024$ &      $0.249\pm0.021$ & $0.248\pm0.019$ & $0.158\pm0.013$ \\
   DRS & $0.240\pm0.010$ &               - &   $0.282\pm0.021$ &      $0.249\pm0.012$ & $0.243\pm0.015$ & $0.149\pm0.010$ \\
  DMLS & $0.264\pm0.014$ &               - &   $0.263\pm0.024$ &      $0.249\pm0.015$ & $0.271\pm0.021$ & $0.193\pm0.012$ \\
    XL &               - &               - &   $0.256\pm0.025$ &      $0.236\pm0.013$ & $0.233\pm0.019$ & $0.153\pm0.013$ \\
    CF &               - &               - &   $0.254\pm0.018$ &      $0.246\pm0.0

### Perspective -- base learners

In [8]:
df_all = pd.read_csv(f'./tables/{ds}_compare_metrics_meta_base_sem_latex.csv')
df_all['name'] = df_all['name'].apply(lambda x: x.upper().replace('_', '-'))

df_att = show_base(df_all, avg_metric, 'pehe', plugin_models, rscore_base_models, matching_ks)
df_pol = show_base(df_all, ite_metric, 'pehe', plugin_models, rscore_base_models, matching_ks)

#df_att['selection'] = df_att['att_rs_lgbm']
#df_pol['selection'] = df_pol['policy_rs_lgbm']

df_merged = df_att.merge(df_pol, on=['name'], suffixes=['_att', '_pol'])

#print(df_merged[['name', 'selection_att', 'att_test', 'selection_pol', 'policy_test']].to_latex(index=False, escape=False))
print(df_merged[['name', 'att_tl_kr_pehe', 'att_match_1k_pehe', 'att_rs_lgbm', 'att_test']].to_latex(index=False, escape=False))

\begin{tabular}{lllll}
\toprule
name &  att_tl_kr_pehe & att_match_1k_pehe &     att_rs_lgbm &        att_test \\
\midrule
  L1 & $0.078\pm0.024$ &   $0.075\pm0.025$ & $0.093\pm0.024$ & $0.032\pm0.022$ \\
  L2 & $0.077\pm0.026$ &   $0.079\pm0.025$ & $0.082\pm0.025$ & $0.061\pm0.023$ \\
  DT & $0.076\pm0.022$ &   $0.069\pm0.021$ & $0.062\pm0.019$ & $0.002\pm0.001$ \\
  RF & $0.073\pm0.024$ &   $0.074\pm0.022$ & $0.074\pm0.021$ & $0.011\pm0.006$ \\
  ET & $0.082\pm0.025$ &   $0.077\pm0.026$ & $0.073\pm0.023$ & $0.014\pm0.008$ \\
  KR & $0.080\pm0.023$ &   $0.080\pm0.022$ & $0.085\pm0.026$ & $0.000\pm0.000$ \\
  CB & $0.077\pm0.023$ &   $0.073\pm0.020$ & $0.066\pm0.021$ & $0.025\pm0.011$ \\
LGBM & $0.076\pm0.022$ &   $0.073\pm0.023$ & $0.076\pm0.024$ & $0.009\pm0.003$ \\
 MLP & $0.088\pm0.022$ &   $0.085\pm0.028$ & $0.088\pm0.025$ & $0.010\pm0.010$ \\
\bottomrule
\end{tabular}



In [9]:
print(df_merged[['name', 'policy_tl_kr_pehe', 'policy_match_1k_pehe', 'policy_rs_lgbm', 'policy_test']].to_latex(index=False, escape=False))

\begin{tabular}{lllll}
\toprule
name & policy_tl_kr_pehe & policy_match_1k_pehe &  policy_rs_lgbm &     policy_test \\
\midrule
  L1 &   $0.246\pm0.014$ &      $0.243\pm0.014$ & $0.270\pm0.024$ & $0.197\pm0.013$ \\
  L2 &   $0.245\pm0.019$ &      $0.255\pm0.014$ & $0.224\pm0.019$ & $0.199\pm0.014$ \\
  DT &   $0.270\pm0.025$ &      $0.230\pm0.014$ & $0.233\pm0.019$ & $0.142\pm0.010$ \\
  RF &   $0.253\pm0.017$ &      $0.241\pm0.015$ & $0.239\pm0.017$ & $0.155\pm0.015$ \\
  ET &   $0.244\pm0.015$ &      $0.232\pm0.012$ & $0.223\pm0.016$ & $0.148\pm0.013$ \\
  KR &   $0.235\pm0.016$ &      $0.254\pm0.016$ & $0.262\pm0.024$ & $0.141\pm0.009$ \\
  CB &   $0.222\pm0.019$ &      $0.211\pm0.016$ & $0.212\pm0.015$ & $0.156\pm0.011$ \\
LGBM &   $0.220\pm0.016$ &      $0.204\pm0.014$ & $0.221\pm0.015$ & $0.174\pm0.011$ \\
 MLP &   $0.242\pm0.015$ &      $0.247\pm0.014$ & $0.216\pm0.015$ & $0.131\pm0.009$ \\
\bottomrule
\end{tabular}



In [9]:
df_all = pd.read_csv(f'./tables/{ds}_compare_metrics_meta_base_sem_latex_nn.csv')

print(df_all.to_latex(index=False, escape=False))


\begin{tabular}{lll}
\toprule
name &        att_test &     policy_test \\
\midrule
 mlp & $0.010\pm0.010$ & $0.131\pm0.009$ \\
\bottomrule
\end{tabular}



### Perspective -- model selection

In [16]:
df_all = pd.read_csv(f'./tables/{ds}_compare_metrics_all_sem_latex.csv')

df_ate = show_all_jobs(df_all, avg_metric, ['ate', 'pehe'], plugin_models, matching_ks, rscore_base_models)
df_pehe = show_all_jobs(df_all, ite_metric, ['ate', 'pehe'], plugin_models, matching_ks, rscore_base_models)

selection_models = [f'{pmm}_{pbm}_{pm}' for pmm in plugin_meta_models for pbm in plugin_base_models for pm in ['ate', 'pehe']] + [f'match_{k}k_{pm}' for k in matching_ks for pm in ['ate', 'pehe']] + [f'rs_{rbm}' for rbm in rscore_base_models]
d_ate = {f'{avg_metric}_test': 'best', f'{avg_metric}_mse': 'mse', f'{avg_metric}_r2': 'r2', f'{avg_metric}_pol': 'pol'}
d_pehe = {f'{ite_metric}_test': 'best', f'{ite_metric}_mse': 'mse', f'{ite_metric}_r2': 'r2', f'{ite_metric}_pol': 'pol'}
for sm in selection_models:
    d_ate[f'{avg_metric}_{sm}'] = sm
    d_pehe[f'{ite_metric}_{sm}'] = sm

df_ate = df_ate.rename(columns=d_ate)
df_pehe = df_pehe.rename(columns=d_pehe)

df_ate = df_ate.set_index('name').T
df_pehe = df_pehe.set_index('name').T

df_merged = df_ate.merge(df_pehe, left_index=True, right_index=True, suffixes=['_att', '_pol']).reset_index()

df_merged['selection'] = df_merged['index'].apply(lambda x: x.upper().replace('_', '-'))
print(df_merged[['selection', 'all_att', 'all_pol']].to_latex(index=False, escape=False))


\begin{tabular}{lll}
\toprule
    selection &         all_att &         all_pol \\
\midrule
          MSE & $0.077\pm0.025$ & $0.245\pm0.013$ \\
           R2 & $0.072\pm0.022$ & $0.257\pm0.019$ \\
          POL & $0.300\pm0.090$ & $0.220\pm0.016$ \\
    SL-DT-ATE & $0.080\pm0.026$ & $0.275\pm0.024$ \\
  SL-LGBM-ATE & $0.075\pm0.025$ & $0.267\pm0.015$ \\
    SL-KR-ATE & $0.068\pm0.022$ & $0.233\pm0.023$ \\
    TL-DT-ATE & $0.086\pm0.026$ & $0.245\pm0.017$ \\
  TL-LGBM-ATE & $0.081\pm0.027$ & $0.250\pm0.016$ \\
    TL-KR-ATE & $0.066\pm0.013$ & $0.222\pm0.010$ \\
   SL-DT-PEHE & $0.083\pm0.026$ & $0.297\pm0.020$ \\
 SL-LGBM-PEHE & $0.083\pm0.026$ & $0.296\pm0.020$ \\
   SL-KR-PEHE & $0.076\pm0.024$ & $0.248\pm0.018$ \\
   TL-DT-PEHE & $0.065\pm0.022$ & $0.248\pm0.016$ \\
 TL-LGBM-PEHE & $0.073\pm0.022$ & $0.247\pm0.015$ \\
   TL-KR-PEHE & $0.085\pm0.023$ & $0.244\pm0.015$ \\
 MATCH-1K-ATE & $0.161\pm0.076$ & $0.229\pm0.018$ \\
 MATCH-3K-ATE & $0.158\pm0.077$ & $0.227\pm0.014$ \\
 MATCH-

## TWINS

In [10]:
plugin_meta_models = ['sl', 'tl']
plugin_base_models = ['dt', 'lgbm', 'kr']
plugin_models = [f'{pmm}_{pbm}' for pmm in plugin_meta_models for pbm in plugin_base_models]
matching_ks = [1, 3, 5]
rscore_base_models = ['dt', 'lgbm', 'kr']

ds = 'twins'
avg_metric = 'ate'
ite_metric = 'pehe'

### Perspective -- causal estimators

In [11]:
df_all = pd.read_csv(f'./tables/{ds}_compare_metrics_meta_est_sem_latex.csv')
df_all['name'] = df_all['name'].apply(lambda x: x.upper().replace('_', '-'))

df_ate = show_est(df_all, avg_metric, 'pehe', plugin_models, rscore_base_models, matching_ks)
df_pehe = show_est(df_all, ite_metric, 'pehe', plugin_models, rscore_base_models, matching_ks)

df_ate['selection'] = df_ate.apply(ate_filter, axis=1)
df_pehe['selection'] = df_pehe.apply(pehe_filter, axis=1)

df_merged = df_ate.merge(df_pehe, on=['name'], suffixes=['_ate', '_pehe'])
#print(df_merged[['name', 'selection_ate', 'ate_test', 'selection_pehe', 'pehe_test']].to_latex(index=False, escape=False))

print(df_merged[['name', 'selection_ate', 'ate_r2', 'ate_tl_kr_pehe', 'ate_match_1k_pehe', 'ate_rs_lgbm', 'ate_test']].to_latex(index=False, escape=False))


\begin{tabular}{lllllll}
\toprule
  name &   selection_ate &          ate_r2 &  ate_tl_kr_pehe & ate_match_1k_pehe &     ate_rs_lgbm &        ate_test \\
\midrule
    SL & $0.039\pm0.000$ & $0.039\pm0.000$ & $0.047\pm0.001$ &   $0.047\pm0.001$ & $0.027\pm0.002$ & $0.000\pm0.000$ \\
    TL & $0.051\pm0.000$ & $0.051\pm0.000$ & $0.077\pm0.000$ &   $0.077\pm0.000$ & $0.077\pm0.000$ & $0.000\pm0.000$ \\
 IPSWS & $0.040\pm0.000$ & $0.040\pm0.000$ & $0.045\pm0.001$ &   $0.045\pm0.001$ & $0.023\pm0.002$ & $0.000\pm0.000$ \\
   DRS & $0.053\pm0.001$ &               - & $0.077\pm0.000$ &   $0.077\pm0.000$ & $0.063\pm0.001$ & $0.001\pm0.000$ \\
  DMLS & $0.033\pm0.001$ &               - & $0.043\pm0.001$ &   $0.060\pm0.003$ & $0.031\pm0.000$ & $0.001\pm0.000$ \\
    XL &               - &               - & $0.077\pm0.000$ &   $0.077\pm0.000$ & $0.052\pm0.001$ & $0.013\pm0.001$ \\
    CF &               - &               - & $0.068\pm0.000$ &   $0.064\pm0.001$ & $0.064\pm0.000$ & $0.063\pm0.000$ 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_ate['selection'] = df_ate.apply(ate_filter, axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_pehe['selection'] = df_pehe.apply(pehe_filter, axis=1)


In [12]:
print(df_merged[['name', 'selection_pehe', 'pehe_r2', 'pehe_tl_kr_pehe', 'pehe_match_1k_pehe', 'pehe_rs_lgbm', 'pehe_test']].to_latex(index=False, escape=False))

\begin{tabular}{lllllll}
\toprule
  name &  selection_pehe &         pehe_r2 & pehe_tl_kr_pehe & pehe_match_1k_pehe &    pehe_rs_lgbm &       pehe_test \\
\midrule
    SL & $0.319\pm0.002$ & $0.319\pm0.002$ & $0.320\pm0.002$ &    $0.320\pm0.002$ & $0.319\pm0.002$ & $0.317\pm0.002$ \\
    TL & $0.328\pm0.003$ & $0.328\pm0.003$ & $0.326\pm0.002$ &    $0.326\pm0.002$ & $0.326\pm0.002$ & $0.318\pm0.002$ \\
 IPSWS & $0.320\pm0.002$ & $0.320\pm0.002$ & $0.320\pm0.002$ &    $0.320\pm0.002$ & $0.318\pm0.002$ & $0.317\pm0.002$ \\
   DRS & $0.330\pm0.003$ &               - & $0.326\pm0.002$ &    $0.326\pm0.002$ & $0.323\pm0.002$ & $0.318\pm0.002$ \\
  DMLS & $0.318\pm0.002$ &               - & $0.320\pm0.002$ &    $0.323\pm0.001$ & $0.318\pm0.002$ & $0.317\pm0.002$ \\
    XL &               - &               - & $0.326\pm0.002$ &    $0.326\pm0.002$ & $0.323\pm0.003$ & $0.318\pm0.002$ \\
    CF &               - &               - & $0.324\pm0.002$ &    $0.323\pm0.002$ & $0.323\pm0.002$ & $0.323\p

In [17]:
df_all = pd.read_csv(f'./tables/{ds}_compare_metrics_meta_est_sem_latex.csv')
df_all['name'] = df_all['name'].apply(lambda x: x.upper().replace('_', '-'))

df_ate = show_est(df_all, avg_metric, 'pehe', plugin_models, rscore_base_models, matching_ks)
#df_pehe = show_est(df_all, ite_metric, 'pehe', plugin_models, rscore_base_models)

df_ate['mse_mix'] = df_ate.apply(ate_filter, axis=1)

print(df_ate[['name', 'mse_mix', 'ate_tl_kr_pehe', 'ate_match_1k_pehe', 'ate_rs_lgbm', 'ate_test']].to_latex(index=False, escape=False))

#print(df_merged[['name', 'selection_ate', 'ate_test', 'selection_pehe', 'pehe_test']].to_latex(index=False, escape=False))


\begin{tabular}{llllll}
\toprule
  name &         mse_mix &  ate_tl_kr_pehe & ate_match_1k_pehe &     ate_rs_lgbm &        ate_test \\
\midrule
    SL & $0.039\pm0.000$ & $0.047\pm0.001$ &   $0.047\pm0.001$ & $0.027\pm0.002$ & $0.000\pm0.000$ \\
    TL & $0.051\pm0.000$ & $0.077\pm0.000$ &   $0.077\pm0.000$ & $0.077\pm0.000$ & $0.000\pm0.000$ \\
 IPSWS & $0.040\pm0.000$ & $0.045\pm0.001$ &   $0.045\pm0.001$ & $0.023\pm0.002$ & $0.000\pm0.000$ \\
   DRS & $0.053\pm0.001$ & $0.077\pm0.000$ &   $0.077\pm0.000$ & $0.063\pm0.001$ & $0.001\pm0.000$ \\
  DMLS & $0.033\pm0.001$ & $0.043\pm0.001$ &   $0.060\pm0.003$ & $0.031\pm0.000$ & $0.001\pm0.000$ \\
    XL &               - & $0.077\pm0.000$ &   $0.077\pm0.000$ & $0.052\pm0.001$ & $0.013\pm0.001$ \\
    CF &               - & $0.068\pm0.000$ &   $0.064\pm0.001$ & $0.064\pm0.000$ & $0.063\pm0.000$ \\
SL-MLP & $0.024\pm0.001$ & $0.038\pm0.001$ &   $0.037\pm0.001$ & $0.024\pm0.003$ & $0.001\pm0.000$ \\
TL-MLP & $0.050\pm0.002$ & $0.064\pm0.00

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_ate['mse_mix'] = df_ate.apply(ate_filter, axis=1)


### Perspective -- base learners

In [11]:
df_all = pd.read_csv(f'./tables/{ds}_compare_metrics_meta_base_sem_latex.csv')
df_all['name'] = df_all['name'].apply(lambda x: x.upper().replace('_', '-'))

df_ate = show_base(df_all, avg_metric, 'pehe', plugin_models, rscore_base_models, matching_ks)
df_pehe = show_base(df_all, ite_metric, 'pehe', plugin_models, rscore_base_models, matching_ks)

#df_ate['selection'] = df_ate['ate_rs_lgbm']
#df_pehe['selection'] = df_pehe['pehe_rs_lgbm']

df_merged = df_ate.merge(df_pehe, on=['name'], suffixes=['_ate', '_pehe'])
#print(df_merged[['name', 'selection_ate', 'ate_test', 'selection_pehe', 'pehe_test']].to_latex(index=False, escape=False))
#print(df_merged[['name', 'ate_test', 'pehe_test']].to_latex(index=False, escape=False))

print(df_merged[['name', 'ate_tl_kr_pehe', 'ate_match_1k_pehe', 'ate_rs_lgbm', 'ate_test']].to_latex(index=False, escape=False))


\begin{tabular}{lllll}
\toprule
name &  ate_tl_kr_pehe & ate_match_1k_pehe &     ate_rs_lgbm &        ate_test \\
\midrule
  L1 & $0.075\pm0.002$ &   $0.077\pm0.000$ & $0.025\pm0.003$ & $0.022\pm0.000$ \\
  L2 & $0.047\pm0.001$ &   $0.047\pm0.001$ & $0.034\pm0.002$ & $0.030\pm0.001$ \\
  DT & $0.042\pm0.001$ &   $0.050\pm0.002$ & $0.030\pm0.004$ & $0.000\pm0.000$ \\
  RF & $0.052\pm0.001$ &   $0.063\pm0.000$ & $0.037\pm0.003$ & $0.000\pm0.000$ \\
  ET & $0.043\pm0.001$ &   $0.045\pm0.000$ & $0.040\pm0.002$ & $0.000\pm0.000$ \\
  KR & $0.044\pm0.000$ &   $0.045\pm0.000$ & $0.027\pm0.002$ & $0.000\pm0.000$ \\
  CB & $0.043\pm0.003$ &   $0.046\pm0.002$ & $0.043\pm0.001$ & $0.023\pm0.001$ \\
LGBM & $0.025\pm0.001$ &   $0.033\pm0.001$ & $0.026\pm0.001$ & $0.011\pm0.000$ \\
 MLP & $0.038\pm0.001$ &   $0.037\pm0.001$ & $0.024\pm0.003$ & $0.000\pm0.000$ \\
\bottomrule
\end{tabular}



In [12]:
print(df_merged[['name', 'pehe_tl_kr_pehe', 'pehe_match_1k_pehe', 'pehe_rs_lgbm', 'pehe_test']].to_latex(index=False, escape=False))

\begin{tabular}{lllll}
\toprule
name & pehe_tl_kr_pehe & pehe_match_1k_pehe &    pehe_rs_lgbm &       pehe_test \\
\midrule
  L1 & $0.325\pm0.001$ &    $0.326\pm0.002$ & $0.318\pm0.002$ & $0.317\pm0.002$ \\
  L2 & $0.320\pm0.002$ &    $0.320\pm0.002$ & $0.319\pm0.002$ & $0.318\pm0.002$ \\
  DT & $0.320\pm0.002$ &    $0.321\pm0.002$ & $0.320\pm0.004$ & $0.317\pm0.002$ \\
  RF & $0.321\pm0.002$ &    $0.324\pm0.002$ & $0.320\pm0.003$ & $0.317\pm0.002$ \\
  ET & $0.320\pm0.002$ &    $0.321\pm0.003$ & $0.321\pm0.004$ & $0.318\pm0.002$ \\
  KR & $0.320\pm0.002$ &    $0.320\pm0.002$ & $0.319\pm0.002$ & $0.317\pm0.002$ \\
  CB & $0.320\pm0.002$ &    $0.321\pm0.002$ & $0.321\pm0.002$ & $0.318\pm0.002$ \\
LGBM & $0.318\pm0.002$ &    $0.320\pm0.003$ & $0.319\pm0.003$ & $0.317\pm0.002$ \\
 MLP & $0.320\pm0.002$ &    $0.320\pm0.002$ & $0.319\pm0.003$ & $0.317\pm0.002$ \\
\bottomrule
\end{tabular}



In [11]:
df_all = pd.read_csv(f'./tables/{ds}_compare_metrics_meta_base_sem_latex_nn.csv')

print(df_all.to_latex(index=False, escape=False))


\begin{tabular}{lll}
\toprule
name &        ate_test &       pehe_test \\
\midrule
 mlp & $0.000\pm0.000$ & $0.317\pm0.002$ \\
\bottomrule
\end{tabular}



### Perspective -- model selection

In [7]:
df_all = pd.read_csv(f'./tables/{ds}_compare_metrics_all_sem_latex.csv')

df_ate = show_all(df_all, avg_metric, ['ate', 'pehe'], plugin_models, matching_ks, rscore_base_models)
df_pehe = show_all(df_all, ite_metric, ['ate', 'pehe'], plugin_models, matching_ks, rscore_base_models)

selection_models = [f'{pmm}_{pbm}_{pm}' for pmm in plugin_meta_models for pbm in plugin_base_models for pm in ['ate', 'pehe']] + [f'match_{k}k_{pm}' for k in matching_ks for pm in ['ate', 'pehe']] + [f'rs_{rbm}' for rbm in rscore_base_models]
d_ate = {f'{avg_metric}_test': 'best', f'{avg_metric}_mse': 'mse', f'{avg_metric}_r2': 'r2'}
d_pehe = {f'{ite_metric}_test': 'best', f'{ite_metric}_mse': 'mse', f'{ite_metric}_r2': 'r2'}
for sm in selection_models:
    d_ate[f'{avg_metric}_{sm}'] = sm
    d_pehe[f'{ite_metric}_{sm}'] = sm

df_ate = df_ate.rename(columns=d_ate)
df_pehe = df_pehe.rename(columns=d_pehe)

df_ate = df_ate.set_index('name').T
df_pehe = df_pehe.set_index('name').T

df_merged = df_ate.merge(df_pehe, left_index=True, right_index=True, suffixes=['_ate', '_pehe']).reset_index()

df_merged['selection'] = df_merged['index'].apply(lambda x: x.upper().replace('_', '-'))
print(df_merged[['selection', 'all_ate', 'all_pehe']].to_latex(index=False, escape=False))


\begin{tabular}{lll}
\toprule
    selection &         all_ate &        all_pehe \\
\midrule
          MSE & $0.039\pm0.000$ & $0.319\pm0.002$ \\
           R2 & $0.039\pm0.000$ & $0.319\pm0.002$ \\
    SL-DT-ATE & $0.016\pm0.005$ & $0.375\pm0.013$ \\
  SL-LGBM-ATE & $0.040\pm0.003$ & $0.412\pm0.023$ \\
    SL-KR-ATE & $0.034\pm0.003$ & $0.446\pm0.024$ \\
    TL-DT-ATE & $0.059\pm0.004$ & $0.404\pm0.019$ \\
  TL-LGBM-ATE & $0.062\pm0.005$ & $0.395\pm0.021$ \\
    TL-KR-ATE & $0.069\pm0.004$ & $0.379\pm0.014$ \\
   SL-DT-PEHE & $0.027\pm0.003$ & $0.318\pm0.002$ \\
 SL-LGBM-PEHE & $0.024\pm0.002$ & $0.317\pm0.002$ \\
   SL-KR-PEHE & $0.022\pm0.000$ & $0.317\pm0.002$ \\
   TL-DT-PEHE & $0.049\pm0.003$ & $0.320\pm0.002$ \\
 TL-LGBM-PEHE & $0.047\pm0.000$ & $0.320\pm0.002$ \\
   TL-KR-PEHE & $0.047\pm0.001$ & $0.320\pm0.002$ \\
 MATCH-1K-ATE & $0.078\pm0.008$ & $0.395\pm0.014$ \\
 MATCH-3K-ATE & $0.078\pm0.005$ & $0.409\pm0.010$ \\
 MATCH-5K-ATE & $0.077\pm0.006$ & $0.411\pm0.008$ \\
MATCH-1