In [1]:
import pandas as pd

In [11]:
def show_est(df, metric, plugin_metric, plugin_models, rscore_models):
    plugin_cols = [f'{metric}_{pm}_{plugin_metric}' for pm in plugin_models]
    rscore_cols = [f'{metric}_rs_{rs_bm}' for rs_bm in rscore_models]
    cols = ['name', f'{metric}_mse', f'{metric}_r2', f'{metric}_mixed'] + plugin_cols + rscore_cols + [f'{metric}_test']
    return df[cols]

def show_base(df, metric, plugin_metric, plugin_models, rscore_models):
    plugin_cols = [f'{metric}_{pm}_{plugin_metric}' for pm in plugin_models]
    rscore_cols = [f'{metric}_rs_{rs_bm}' for rs_bm in rscore_models]
    cols = ['name'] + plugin_cols + rscore_cols + [f'{metric}_test']
    return df[cols]

def show_all(df, metric, plugin_metrics, plugin_models, matching_ks, rscore_models):
    plugin_cols = [f'{metric}_{pm}_{plugin_metric}' for plugin_metric in plugin_metrics for pm in plugin_models]
    matching_cols = [f'{metric}_match_{k}k_{plugin_metric}' for plugin_metric in plugin_metrics for k in matching_ks]
    rscore_cols = [f'{metric}_rs_{rs_bm}' for rs_bm in rscore_models]
    cols = ['name', f'{metric}_mse', f'{metric}_r2'] + plugin_cols + matching_cols + rscore_cols + [f'{metric}_test']
    return df[cols]

In [3]:
def my_filter(x, metric):
    if x[f'{metric}_mse'] != '-':
        return x[f'{metric}_mse']
    elif x[f'{metric}_mixed'] != '-':
        return x[f'{metric}_mixed']
    else:
        return x[f'{metric}_rs_lgbm']

def ate_filter(x):
    return my_filter(x, 'ate')

def pehe_filter(x):
    return my_filter(x, 'pehe')

def att_filter(x):
    return my_filter(x, 'att')

def policy_filter(x):
    return my_filter(x, 'policy')

## IHDP

In [12]:
plugin_meta_models = ['sl', 'tl']
#plugin_base_models = ['dt', 'lgbm', 'cb']
plugin_base_models = ['dt', 'lgbm', 'kr']
plugin_models = [f'{pmm}_{pbm}' for pmm in plugin_meta_models for pbm in plugin_base_models]
matching_ks = [1, 3, 5]
#rscore_base_models = ['dt', 'lgbm', 'cb']
rscore_base_models = ['dt', 'lgbm', 'kr']

ds = 'ihdp'
avg_metric = 'ate'
ite_metric = 'pehe'

### Perspective -- causal estimators

In [10]:
df_all = pd.read_csv(f'./tables/{ds}_compare_metrics_meta_est_latex.csv')
df_all['name'] = df_all['name'].apply(lambda x: x.upper().replace('_', '-'))

df_ate = show_est(df_all, avg_metric, 'ate', plugin_models, rscore_base_models)
df_pehe = show_est(df_all, ite_metric, 'pehe', plugin_models, rscore_base_models)

df_ate['selection'] = df_ate.apply(ate_filter, axis=1)
df_pehe['selection'] = df_pehe.apply(pehe_filter, axis=1)

df_merged = df_ate.merge(df_pehe, on=['name'], suffixes=['_ate', '_pehe'])
print(df_merged[['name', 'selection_ate', 'ate_test', 'selection_pehe', 'pehe_test']].to_latex(index=False, escape=False))


\begin{tabular}{lllll}
\toprule
  name &   selection_ate &        ate_test &  selection_pehe &       pehe_test \\
\midrule
    SL & $0.246\pm0.294$ & $0.001\pm0.003$ & $1.373\pm1.836$ & $1.205\pm1.683$ \\
    TL & $0.168\pm0.293$ & $0.000\pm0.000$ & $0.701\pm0.607$ & $0.621\pm0.601$ \\
 IPSWS & $0.131\pm0.101$ & $0.001\pm0.001$ & $1.552\pm2.179$ & $1.204\pm1.680$ \\
   DRS & $0.211\pm0.236$ & $0.002\pm0.002$ & $1.470\pm1.873$ & $1.275\pm1.744$ \\
  DMLS & $0.438\pm0.407$ & $0.007\pm0.010$ & $1.905\pm2.477$ & $1.679\pm2.489$ \\
    XL & $0.270\pm0.404$ & $0.009\pm0.020$ & $1.276\pm1.344$ & $1.067\pm1.226$ \\
    CF & $0.240\pm0.388$ & $0.198\pm0.392$ & $2.295\pm3.556$ & $2.290\pm3.559$ \\
SL-MLP & $0.486\pm0.385$ & $0.104\pm0.113$ & $1.064\pm0.637$ & $0.925\pm0.673$ \\
TL-MLP & $0.221\pm0.229$ & $0.000\pm0.000$ & $0.894\pm0.549$ & $0.641\pm0.570$ \\
\bottomrule
\end{tabular}



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_ate['selection'] = df_ate.apply(ate_filter, axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_pehe['selection'] = df_pehe.apply(pehe_filter, axis=1)


### Perspective -- base learners

In [11]:
df_all = pd.read_csv(f'./tables/{ds}_compare_metrics_meta_base_latex.csv')
df_all['name'] = df_all['name'].apply(lambda x: x.upper().replace('_', '-'))

df_ate = show_base(df_all, avg_metric, 'ate', plugin_models, rscore_base_models)
df_pehe = show_base(df_all, ite_metric, 'pehe', plugin_models, rscore_base_models)

df_ate['selection'] = df_ate['ate_rs_lgbm']
df_pehe['selection'] = df_pehe['pehe_rs_lgbm']

df_merged = df_ate.merge(df_pehe, on=['name'], suffixes=['_ate', '_pehe'])
print(df_merged[['name', 'selection_ate', 'ate_test', 'selection_pehe', 'pehe_test']].to_latex(index=False, escape=False))


\begin{tabular}{lllll}
\toprule
  name &   selection_ate &        ate_test &  selection_pehe &       pehe_test \\
\midrule
    L1 & $0.340\pm0.388$ & $0.046\pm0.049$ & $1.795\pm2.643$ & $1.643\pm2.651$ \\
    L2 & $0.270\pm0.426$ & $0.161\pm0.336$ & $1.810\pm2.696$ & $1.603\pm2.522$ \\
    DT & $0.399\pm0.605$ & $0.000\pm0.001$ & $2.330\pm3.085$ & $1.890\pm2.797$ \\
    RF & $0.253\pm0.286$ & $0.020\pm0.033$ & $1.909\pm2.905$ & $1.529\pm2.434$ \\
    ET & $0.292\pm0.387$ & $0.003\pm0.006$ & $1.999\pm3.320$ & $1.582\pm2.744$ \\
    KR & $0.282\pm0.313$ & $0.001\pm0.002$ & $1.399\pm1.868$ & $0.653\pm0.586$ \\
    CB & $0.267\pm0.191$ & $0.004\pm0.007$ & $1.453\pm2.099$ & $0.893\pm1.157$ \\
  LGBM & $0.264\pm0.239$ & $0.016\pm0.024$ & $1.881\pm2.720$ & $1.326\pm1.687$ \\
    CF & $0.240\pm0.388$ & $0.198\pm0.392$ & $2.295\pm3.556$ & $2.290\pm3.559$ \\
SL-MLP & $0.342\pm0.201$ & $0.104\pm0.113$ & $1.232\pm0.831$ & $0.925\pm0.673$ \\
TL-MLP & $0.507\pm0.624$ & $0.000\pm0.000$ & $1.399\pm1.4

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_ate['selection'] = df_ate['ate_rs_lgbm']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_pehe['selection'] = df_pehe['pehe_rs_lgbm']


### Perspective -- model selection

In [14]:
df_all = pd.read_csv(f'./tables/{ds}_compare_metrics_all_latex.csv')

df_ate = show_all(df_all, avg_metric, ['ate', 'pehe'], plugin_models, matching_ks, rscore_base_models)
df_pehe = show_all(df_all, ite_metric, ['ate', 'pehe'], plugin_models, matching_ks, rscore_base_models)

selection_models = [f'{pmm}_{pbm}_{pm}' for pmm in plugin_meta_models for pbm in plugin_base_models for pm in ['ate', 'pehe']] + [f'match_{k}k_{pm}' for k in matching_ks for pm in ['ate', 'pehe']] + [f'rs_{rbm}' for rbm in rscore_base_models]
d_ate = {f'{avg_metric}_test': 'best', f'{avg_metric}_mse': 'mse', f'{avg_metric}_r2': 'r2'}
d_pehe = {f'{ite_metric}_test': 'best', f'{ite_metric}_mse': 'mse', f'{ite_metric}_r2': 'r2'}
for sm in selection_models:
    d_ate[f'{avg_metric}_{sm}'] = sm
    d_pehe[f'{ite_metric}_{sm}'] = sm

df_ate = df_ate.rename(columns=d_ate)
df_pehe = df_pehe.rename(columns=d_pehe)

df_ate = df_ate.set_index('name').T
df_pehe = df_pehe.set_index('name').T

df_merged = df_ate.merge(df_pehe, left_index=True, right_index=True, suffixes=['_ate', '_pehe']).reset_index()

df_merged['selection'] = df_merged['index'].apply(lambda x: x.upper().replace('_', '-'))
print(df_merged[['selection', 'all_ate', 'all_pehe']].to_latex(index=False, escape=False))


\begin{tabular}{lll}
\toprule
    selection &         all_ate &        all_pehe \\
\midrule
          MSE & $0.188\pm0.291$ & $0.786\pm0.568$ \\
           R2 & $0.352\pm0.437$ & $0.922\pm0.711$ \\
    SL-DT-ATE & $1.455\pm2.545$ & $3.327\pm4.362$ \\
  SL-LGBM-ATE & $4.791\pm2.711$ & $6.998\pm6.783$ \\
    SL-KR-ATE & $1.430\pm1.823$ & $2.868\pm2.934$ \\
    TL-DT-ATE & $0.201\pm0.172$ & $1.873\pm1.179$ \\
  TL-LGBM-ATE & $0.237\pm0.245$ & $1.983\pm1.302$ \\
    TL-KR-ATE & $0.409\pm0.344$ & $1.796\pm2.069$ \\
   SL-DT-PEHE & $1.655\pm3.808$ & $4.188\pm7.957$ \\
 SL-LGBM-PEHE & $4.791\pm2.711$ & $6.998\pm6.783$ \\
   SL-KR-PEHE & $1.652\pm2.363$ & $4.438\pm6.744$ \\
   TL-DT-PEHE & $0.267\pm0.375$ & $2.605\pm4.298$ \\
 TL-LGBM-PEHE & $0.306\pm0.520$ & $2.357\pm3.041$ \\
   TL-KR-PEHE & $0.209\pm0.144$ & $1.341\pm1.555$ \\
 MATCH-1K-ATE & $0.323\pm0.294$ & $3.199\pm4.756$ \\
 MATCH-3K-ATE & $0.176\pm0.117$ & $1.640\pm1.055$ \\
 MATCH-5K-ATE & $0.209\pm0.194$ & $1.841\pm1.434$ \\
MATCH-1

## JOBS

In [15]:
plugin_meta_models = ['sl', 'tl']
#plugin_base_models = ['dt', 'lgbm']
plugin_base_models = ['dt', 'lgbm', 'kr']
plugin_models = [f'{pmm}_{pbm}' for pmm in plugin_meta_models for pbm in plugin_base_models]
matching_ks = [1, 3, 5]
#rscore_base_models = ['dt', 'lgbm', 'cb']
rscore_base_models = ['dt', 'lgbm', 'kr']

ds = 'jobs'
avg_metric = 'att'
ite_metric = 'policy'

### Perspective -- causal estimators

In [14]:
df_all = pd.read_csv(f'./tables/{ds}_compare_metrics_meta_est_latex.csv')
df_all['name'] = df_all['name'].apply(lambda x: x.upper().replace('_', '-'))

df_att = show_est(df_all, avg_metric, 'ate', plugin_models, rscore_base_models)
df_pol = show_est(df_all, ite_metric, 'pehe', plugin_models, rscore_base_models)

df_att['selection'] = df_att.apply(att_filter, axis=1)
df_pol['selection'] = df_pol.apply(policy_filter, axis=1)

df_merged = df_att.merge(df_pol, on=['name'], suffixes=['_att', '_pol'])

print(df_merged[['name', 'selection_att', 'att_test', 'selection_pol', 'policy_test']].to_latex(index=False, escape=False))

\begin{tabular}{lllll}
\toprule
  name &   selection_att &        att_test &   selection_pol &     policy_test \\
\midrule
    SL & $0.066\pm0.060$ & $0.003\pm0.004$ & $0.261\pm0.057$ & $0.158\pm0.034$ \\
    TL & $0.074\pm0.069$ & $0.000\pm0.000$ & $0.235\pm0.058$ & $0.128\pm0.036$ \\
 IPSWS & $0.077\pm0.075$ & $0.024\pm0.056$ & $0.245\pm0.038$ & $0.158\pm0.038$ \\
   DRS & $0.075\pm0.069$ & $0.001\pm0.002$ & $0.240\pm0.029$ & $0.149\pm0.030$ \\
  DMLS & $0.078\pm0.070$ & $0.016\pm0.016$ & $0.264\pm0.043$ & $0.193\pm0.035$ \\
    XL & $0.077\pm0.074$ & $0.003\pm0.007$ & $0.233\pm0.057$ & $0.153\pm0.038$ \\
    CF & $0.072\pm0.064$ & $0.053\pm0.067$ & $0.225\pm0.047$ & $0.204\pm0.050$ \\
SL-MLP & $0.068\pm0.066$ & $0.023\pm0.057$ & $0.254\pm0.055$ & $0.162\pm0.029$ \\
TL-MLP & $0.066\pm0.078$ & $0.010\pm0.029$ & $0.226\pm0.041$ & $0.132\pm0.028$ \\
\bottomrule
\end{tabular}



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_att['selection'] = df_att.apply(att_filter, axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_pol['selection'] = df_pol.apply(policy_filter, axis=1)


### Perspective -- base learners

In [15]:
df_all = pd.read_csv(f'./tables/{ds}_compare_metrics_meta_base_latex.csv')
df_all['name'] = df_all['name'].apply(lambda x: x.upper().replace('_', '-'))

df_att = show_base(df_all, avg_metric, 'ate', plugin_models, rscore_base_models)
df_pol = show_base(df_all, ite_metric, 'pehe', plugin_models, rscore_base_models)

df_att['selection'] = df_att['att_rs_lgbm']
df_pol['selection'] = df_pol['policy_rs_lgbm']

df_merged = df_att.merge(df_pol, on=['name'], suffixes=['_att', '_pol'])

print(df_merged[['name', 'selection_att', 'att_test', 'selection_pol', 'policy_test']].to_latex(index=False, escape=False))

\begin{tabular}{lllll}
\toprule
  name &   selection_att &        att_test &   selection_pol &     policy_test \\
\midrule
    L1 & $0.093\pm0.072$ & $0.032\pm0.066$ & $0.270\pm0.071$ & $0.197\pm0.040$ \\
    L2 & $0.082\pm0.075$ & $0.061\pm0.068$ & $0.224\pm0.058$ & $0.199\pm0.042$ \\
    DT & $0.062\pm0.058$ & $0.002\pm0.004$ & $0.233\pm0.056$ & $0.142\pm0.031$ \\
    RF & $0.074\pm0.062$ & $0.011\pm0.019$ & $0.239\pm0.050$ & $0.155\pm0.046$ \\
    ET & $0.073\pm0.070$ & $0.014\pm0.025$ & $0.223\pm0.048$ & $0.148\pm0.039$ \\
    KR & $0.085\pm0.078$ & $0.000\pm0.000$ & $0.262\pm0.072$ & $0.141\pm0.026$ \\
    CB & $0.066\pm0.064$ & $0.025\pm0.034$ & $0.212\pm0.046$ & $0.156\pm0.033$ \\
  LGBM & $0.076\pm0.072$ & $0.009\pm0.010$ & $0.221\pm0.045$ & $0.174\pm0.034$ \\
    CF & $0.072\pm0.064$ & $0.053\pm0.067$ & $0.225\pm0.047$ & $0.204\pm0.050$ \\
SL-MLP & $0.075\pm0.076$ & $0.023\pm0.057$ & $0.251\pm0.042$ & $0.162\pm0.029$ \\
TL-MLP & $0.088\pm0.074$ & $0.010\pm0.029$ & $0.216\pm0.0

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_att['selection'] = df_att['att_rs_lgbm']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_pol['selection'] = df_pol['policy_rs_lgbm']


### Perspective -- model selection

In [16]:
df_all = pd.read_csv(f'./tables/{ds}_compare_metrics_all_latex.csv')

df_ate = show_all(df_all, avg_metric, ['ate', 'pehe'], plugin_models, matching_ks, rscore_base_models)
df_pehe = show_all(df_all, ite_metric, ['ate', 'pehe'], plugin_models, matching_ks, rscore_base_models)

selection_models = [f'{pmm}_{pbm}_{pm}' for pmm in plugin_meta_models for pbm in plugin_base_models for pm in ['ate', 'pehe']] + [f'match_{k}k_{pm}' for k in matching_ks for pm in ['ate', 'pehe']] + [f'rs_{rbm}' for rbm in rscore_base_models]
d_ate = {f'{avg_metric}_test': 'best', f'{avg_metric}_mse': 'mse', f'{avg_metric}_r2': 'r2'}
d_pehe = {f'{ite_metric}_test': 'best', f'{ite_metric}_mse': 'mse', f'{ite_metric}_r2': 'r2'}
for sm in selection_models:
    d_ate[f'{avg_metric}_{sm}'] = sm
    d_pehe[f'{ite_metric}_{sm}'] = sm

df_ate = df_ate.rename(columns=d_ate)
df_pehe = df_pehe.rename(columns=d_pehe)

df_ate = df_ate.set_index('name').T
df_pehe = df_pehe.set_index('name').T

df_merged = df_ate.merge(df_pehe, left_index=True, right_index=True, suffixes=['_att', '_pol']).reset_index()

df_merged['selection'] = df_merged['index'].apply(lambda x: x.upper().replace('_', '-'))
print(df_merged[['selection', 'all_att', 'all_pol']].to_latex(index=False, escape=False))


\begin{tabular}{lll}
\toprule
    selection &         all_att &         all_pol \\
\midrule
          MSE & $0.077\pm0.075$ & $0.245\pm0.038$ \\
           R2 & $0.072\pm0.065$ & $0.257\pm0.058$ \\
    SL-DT-ATE & $0.080\pm0.079$ & $0.275\pm0.072$ \\
  SL-LGBM-ATE & $0.075\pm0.076$ & $0.267\pm0.046$ \\
    SL-KR-ATE & $0.068\pm0.065$ & $0.233\pm0.070$ \\
    TL-DT-ATE & $0.086\pm0.079$ & $0.245\pm0.052$ \\
  TL-LGBM-ATE & $0.081\pm0.081$ & $0.250\pm0.047$ \\
    TL-KR-ATE & $0.066\pm0.039$ & $0.222\pm0.031$ \\
   SL-DT-PEHE & $0.083\pm0.079$ & $0.297\pm0.060$ \\
 SL-LGBM-PEHE & $0.083\pm0.079$ & $0.296\pm0.060$ \\
   SL-KR-PEHE & $0.076\pm0.072$ & $0.248\pm0.054$ \\
   TL-DT-PEHE & $0.065\pm0.067$ & $0.248\pm0.049$ \\
 TL-LGBM-PEHE & $0.073\pm0.067$ & $0.247\pm0.044$ \\
   TL-KR-PEHE & $0.085\pm0.070$ & $0.244\pm0.045$ \\
 MATCH-1K-ATE & $0.161\pm0.228$ & $0.229\pm0.053$ \\
 MATCH-3K-ATE & $0.158\pm0.230$ & $0.227\pm0.043$ \\
 MATCH-5K-ATE & $0.160\pm0.231$ & $0.236\pm0.063$ \\
MATCH-1