# Paper Tables and Figures
- Goal: Build publication-ready summary tables from saved experiment outputs.
- Flow: Load result files, compute formatted summaries, and export tables.


## Load Results
- Step: Read required simulation and application summary files.
- Check: Validate expected schema before table construction.


In [21]:
import pandas as pd
from pathlib import Path
from IPython.display import display

RESULTS_ROOT = Path('../results')
SIM_ROOT = RESULTS_ROOT / 'simulations'
APP_ROOT = RESULTS_ROOT / 'application'
APP_SUMMARY = APP_ROOT / 'summary'

print('RESULTS_ROOT:', RESULTS_ROOT.resolve)

RESULTS_ROOT: /path/to/your/project 


In [22]:
TABLE_PATHS = {
 # simulations
 'sim_summary_ab_long': SIM_ROOT / 'summary_ALL.csv',
 'sim_summary_ab_wide': SIM_ROOT / 'summary_ALL_wide.csv',
 'sim_summary_cd_long': SIM_ROOT / 'summary_ALL_CD.csv',
 'sim_placebo_ab_pivot': SIM_ROOT / 'summary_placebo_table.csv',
 'sim_placebo_cd_pivot': SIM_ROOT / 'summary_placebo_table_CD.csv',

 # application
 'app_taskA_all_categories': APP_SUMMARY / 'taskA_model_metrics_all_categories.csv',
 'app_taskA_category_summary': APP_SUMMARY / 'taskA_category_summary.csv',
 'app_taskB_te_all_categories': APP_SUMMARY / 'taskB_placebo_te_summary_all_categories.csv',
 'app_taskB_split_all_categories': APP_SUMMARY / 'taskB_placebo_split_metrics_all_categories.csv',
}

for k, v in TABLE_PATHS.items:
 print(f'{k:30s} -> {v}')

sim_summary_ab_long   ->../results/simulations/summary_ALL.csv
sim_summary_ab_wide   ->../results/simulations/summary_ALL_wide.csv
sim_summary_cd_long   ->../results/simulations/summary_ALL_CD.csv
sim_placebo_ab_pivot   ->../results/simulations/summary_placebo_table.csv
sim_placebo_cd_pivot   ->../results/simulations/summary_placebo_table_CD.csv
app_taskA_all_categories  ->../results/application/summary/taskA_model_metrics_all_categories.csv
app_taskA_category_summary  ->../results/application/summary/taskA_category_summary.csv
app_taskB_te_all_categories ->../results/application/summary/taskB_placebo_te_summary_all_categories.csv
app_taskB_split_all_categories ->../results/application/summary/taskB_placebo_split_metrics_all_categories.csv


In [23]:
def load_csv(path: Path) -> pd.DataFrame:
 if not path.exists:
  raise FileNotFoundError(path)
 return pd.read_csv(path)

TABLES = {name: load_csv(path) for name, path in TABLE_PATHS.items}

status_rows = []
for name, df in TABLES.items:
 status_rows.append({
  'table': name,
  'rows': int(df.shape[0]),
  'cols': int(df.shape[1]),
  'columns': ', '.join(df.columns.astype(str).tolist),
 })

status_df = pd.DataFrame(status_rows).sort_values('table').reset_index(drop=True)
display(status_df)

Unnamed: 0,table,rows,cols,columns
0,app_taskA_all_categories,9,19,"category, model, N, K1, K2, K3, T_pre, mu, rms..."
1,app_taskA_category_summary,3,4,"category, best_model_by_rmse, best_rmse_fit, m..."
2,app_taskB_split_all_categories,9,8,"category, model, Train_MAE, Train_RMSE, Placeb..."
3,app_taskB_te_all_categories,9,12,"category, model, tau_mae_avg_over_cohorts, tau..."
4,sim_placebo_ab_pivot,30,4,"model, metric, A, B"
5,sim_placebo_cd_pivot,30,4,"model, metric, C, D"
6,sim_summary_ab_long,174,7,"scenario, task, model, effect_type, metric, me..."
7,sim_summary_ab_wide,33,20,"scenario, task, model, effect_type, Bias, CI_W..."
8,sim_summary_cd_long,156,7,"scenario, task, model, effect_type, metric, me..."


In [24]:
# Quick preview
for name in sorted(TABLES):
 print(f'\n=== {name} ===')
 display(TABLES[name].head(3))


=== app_taskA_all_categories ===


Unnamed: 0,category,model,N,K1,K2,K3,T_pre,mu,rmse_fit,mae_fit,coverage,ci_width_mean,ci_width_std,ci_width_alpha_mean,ci_width_alpha_std,ci_width_beta_mean,ci_width_beta_std,resid_mean,resid_std
0,gym_instore,FE+AR,1767,65,38,65,37,3.031322,0.129003,0.098838,0.956423,0.106202,0.062529,0.077471,0.00928,0.122998,0.073353,-3.196839e-16,0.129003
1,gym_instore,GP-CP,1767,65,38,65,37,3.029024,0.13098,0.099514,0.958121,0.096924,0.060279,0.058288,0.007545,0.119512,0.065891,0.0001502922,0.13098
2,gym_instore,GP-CP-Extended,1767,65,38,65,37,3.022885,0.053565,0.037638,0.938879,0.113449,0.084636,0.083571,0.024193,0.130915,0.100906,-7.575341e-06,0.053565



=== app_taskA_category_summary ===


Unnamed: 0,category,best_model_by_rmse,best_rmse_fit,metrics_csv
0,gym_instore,GP-CP-Extended,0.053565,../results/application/gym_instore/tables/task...
1,mealkit,GP-CP-Extended,0.036293,../results/application/mealkit/tables/taskA_mo...
2,streaming,GP-CP-Extended,0.03011,../results/application/streaming/tables/taskA_...



=== app_taskB_split_all_categories ===


Unnamed: 0,category,model,Train_MAE,Train_RMSE,PlaceboTest_MAE,PlaceboTest_RMSE,Residual_mean,Residual_SD
0,gym_instore,FE+AR,0.080273,0.107318,0.182489,0.244187,-0.018385,0.186396
1,gym_instore,GP-CP,0.085552,0.112358,0.430521,0.479549,-0.200035,0.281183
2,gym_instore,GP-CP-Extended,0.039738,0.057592,0.208654,0.253926,-0.083663,0.162092



=== app_taskB_te_all_categories ===


Unnamed: 0,category,model,tau_mae_avg_over_cohorts,tau_rmse_avg_over_cohorts,tau_coverage_avg_over_cohorts,tau_ci_width_avg_over_cohorts,tau_ci_width_std_over_cohorts,att_avg_over_t,att_mae_vs_zero,att_rmse_vs_zero,att_coverage_vs_zero,att_ci_width_avg_over_t
0,gym_instore,FE+AR,0.069581,0.093073,0.924168,0.424536,0.299167,-0.037535,0.037535,0.037535,1.0,0.157078
1,gym_instore,GP-CP,0.161614,0.169609,0.817457,0.467618,0.27533,-0.408201,0.408201,0.408201,0.0,0.164757
2,gym_instore,GP-CP-Extended,0.071766,0.082775,0.971933,0.69459,0.206896,-0.170699,0.170699,0.170699,0.0,0.16543



=== sim_placebo_ab_pivot ===


Unnamed: 0,model,metric,A,B
0,FE+AR,att_avg_over_t,0.131752,0.116171
1,FE+AR,att_ci_width_avg_over_t,0.748568,0.773673
2,FE+AR,att_coverage_vs_zero,0.2,0.2



=== sim_placebo_cd_pivot ===


Unnamed: 0,model,metric,C,D
0,FE+AR,att_avg_over_t,-0.0903,-0.0903
1,FE+AR,att_ci_width_avg_over_t,0.290025,0.603712
2,FE+AR,att_coverage_vs_zero,0.4,0.8



=== sim_summary_ab_long ===


Unnamed: 0,scenario,task,model,effect_type,metric,mean,std
0,A,extrapolation,FE+AR,beta,CI_Width,3.328387,0.398898
1,A,extrapolation,FE+AR,beta,Coverage,0.88,0.086923
2,A,extrapolation,FE+AR,beta,MAE,0.868051,0.159018



=== sim_summary_ab_wide ===


Unnamed: 0,scenario,task,model,effect_type,Bias,CI_Width,Coverage,MAE,MAPE,RMSE,att_avg_over_t,att_ci_width_avg_over_t,att_coverage_vs_zero,att_mae_vs_zero,att_rmse_vs_zero,tau_ci_width_avg_over_cohorts,tau_ci_width_std_over_cohorts,tau_coverage_avg_over_cohorts,tau_mae_avg_over_cohorts,tau_rmse_avg_over_cohorts
0,A,extrapolation,FE+AR,beta,,3.328387,0.88,0.868051,709.85519,1.050242,,,,,,,,,,
1,A,extrapolation,GP-CP,beta,,2.908283,0.973333,0.514995,237.251812,0.628349,,,,,,,,,,
2,A,extrapolation,GP-CP-Extended,beta,,2.881546,0.973333,0.508349,232.756969,0.623207,,,,,,,,,,



=== sim_summary_cd_long ===


Unnamed: 0,scenario,task,model,effect_type,metric,mean,std
0,C,fit,FE+AR,alpha,Bias,-3.885781e-18,6.926844000000001e-17
1,C,fit,FE+AR,alpha,CI_Width,0.1487692,0.004170626
2,C,fit,FE+AR,alpha,Coverage,0.95,0.03535534


## Setup
- Step: Define output directories and helper formatting functions.
- Output: Reusable utilities for table generation.


## Simulation Summary Tables
- Step: Build model-comparison tables from simulation summaries.
- Save: CSV and formatted outputs for reporting.


In [25]:
from pathlib import Path
import numpy as np
import pandas as pd

OUT_SIM_SUMMARY = RESULTS_ROOT / 'summary_simulations'
OUT_SIM_SUMMARY.mkdir(parents=True, exist_ok=True)

# 1) Base fit summaries (already mean/std over seeds) from combined A/B + C/D
fit_ab = TABLES['sim_summary_ab_long'].copy
fit_cd = TABLES['sim_summary_cd_long'].copy
fit_all = pd.concat([fit_ab, fit_cd], ignore_index=True)
fit_all = fit_all[(fit_all['task'] == 'fit') & (fit_all['effect_type'].isin(['alpha', 'beta']))].copy

# 2) Compute MAE for fit from per-seed fit tables (not in existing fit summary CSVs)
def _seed_fit_path(scenario: str, seed: int) -> Path:
 return SIM_ROOT / scenario / str(seed) / f'scen{scenario}_fit_seed{seed}.csv'

mae_rows = []
for scenario in ['A', 'B', 'C', 'D']:
 for seed in [0, 1, 2, 3, 4]:
  fp = _seed_fit_path(scenario, seed)
  if not fp.exists:
   continue
  d = pd.read_csv(fp)
  d = d[d['effect_type'].isin(['alpha', 'beta'])].copy
  d['ae'] = (d['mean'] - d['truth']).abs
  g = d.groupby(['model', 'effect_type'], as_index=False)['ae'].mean
  g = g.rename(columns={'ae': 'value', 'effect_type': 'effect_type'})
  g['scenario'] = scenario
  g['task'] = 'fit'
  g['metric'] = 'MAE'
  g['seed'] = seed
  mae_rows.append(g[['scenario', 'task', 'model', 'effect_type', 'metric', 'value', 'seed']])

if mae_rows:
 mae_seed = pd.concat(mae_rows, ignore_index=True)
 mae_summary = mae_seed.groupby(['scenario', 'task', 'model', 'effect_type', 'metric'], as_index=False)['value'].agg(mean='mean', std='std')
 fit_all = pd.concat([fit_all, mae_summary], ignore_index=True)

# 3) Keep requested metrics and format mean/std strings
wanted_metrics = ['MAE', 'RMSE', 'Coverage', 'CI_Width']
tab1 = fit_all[fit_all['metric'].isin(wanted_metrics)].copy

tab1['metric_stat'] = tab1['metric'].astype(str) + '_mean_std'
tab1['value'] = tab1.apply(lambda r: f"{r['mean']:.4f} ({r['std']:.4f})", axis=1)

tab1_wide = (
 tab1.pivot_table(
  index=['model', 'scenario', 'effect_type'],
  columns='metric_stat',
  values='value',
  aggfunc='first'
 )
.reset_index
)

# enforce order
model_order = ['FE+AR', 'GP-CP', 'GP-CP-Extended']
scenario_order = ['A', 'B', 'C', 'D']
effect_order = ['alpha', 'beta']

tab1_wide['model'] = pd.Categorical(tab1_wide['model'], categories=model_order, ordered=True)
tab1_wide['scenario'] = pd.Categorical(tab1_wide['scenario'], categories=scenario_order, ordered=True)
tab1_wide['effect_type'] = pd.Categorical(tab1_wide['effect_type'], categories=effect_order, ordered=True)

tab1_wide = tab1_wide.sort_values(['model', 'scenario', 'effect_type']).reset_index(drop=True)

# 4) Print subtables by model (rows: scenario x effect)
subtables = {}
for m in model_order:
 sub = tab1_wide[tab1_wide['model'] == m].copy
 sub = sub.drop(columns=['model'])
 subtables[m] = sub
 print(f"\n=== Table 1 Subtable: {m} ===")
 display(sub)

# 5) Save outputs
(tab1_wide).to_csv(OUT_SIM_SUMMARY / 'table1_model_fitting_comparison_all_models.csv', index=False)
for m, sub in subtables.items:
 safe = m.replace('+', 'plus').replace('-', '_').lower
 sub.to_csv(OUT_SIM_SUMMARY / f'table1_model_fitting_comparison_{safe}.csv', index=False)

print('Wrote:', OUT_SIM_SUMMARY / 'table1_model_fitting_comparison_all_models.csv')
for m in model_order:
 safe = m.replace('+', 'plus').replace('-', '_').lower
 print('Wrote:', OUT_SIM_SUMMARY / f'table1_model_fitting_comparison_{safe}.csv')


=== Table 1 Subtable: FE+AR ===


metric_stat,scenario,effect_type,CI_Width_mean_std,Coverage_mean_std,MAE_mean_std,RMSE_mean_std
0,A,alpha,0.1480 (0.0025),0.9300 (0.0570),0.0292 (0.0020),0.0372 (0.0041)
1,A,beta,0.2608 (0.0045),0.9667 (0.0264),0.0498 (0.0057),0.0624 (0.0072)
2,B,alpha,0.4679 (0.0185),1.0000 (0.0000),0.0268 (0.0043),0.0325 (0.0049)
3,B,beta,0.8245 (0.0326),1.0000 (0.0000),0.0514 (0.0034),0.0643 (0.0039)
4,C,alpha,0.1488 (0.0042),0.9500 (0.0354),0.0291 (0.0052),0.0371 (0.0068)
5,C,beta,0.2622 (0.0073),0.9633 (0.0274),0.0516 (0.0040),0.0641 (0.0064)
6,D,alpha,1.0799 (0.1052),1.0000 (0.0000),0.0291 (0.0052),0.0371 (0.0068)
7,D,beta,1.9030 (0.1854),1.0000 (0.0000),0.0516 (0.0040),0.0641 (0.0064)



=== Table 1 Subtable: GP-CP ===


metric_stat,scenario,effect_type,CI_Width_mean_std,Coverage_mean_std,MAE_mean_std,RMSE_mean_std
8,A,alpha,0.1217 (0.0061),0.9400 (0.0418),0.0262 (0.0028),0.0325 (0.0037)
9,A,beta,0.2008 (0.0059),0.9667 (0.0264),0.0402 (0.0027),0.0491 (0.0037)
10,B,alpha,0.3418 (0.0160),1.0000 (0.0000),0.0380 (0.0057),0.0451 (0.0067)
11,B,beta,0.5267 (0.0137),1.0000 (0.0000),0.0697 (0.0125),0.0868 (0.0137)
12,C,alpha,0.0928 (0.0034),0.9200 (0.0908),0.0200 (0.0042),0.0244 (0.0051)
13,C,beta,0.1548 (0.0092),0.9767 (0.0149),0.0290 (0.0021),0.0358 (0.0030)
14,D,alpha,0.5492 (0.0481),1.0000 (0.0000),0.0614 (0.0060),0.0723 (0.0063)
15,D,beta,0.7044 (0.0359),0.9833 (0.0373),0.1428 (0.0374),0.1733 (0.0434)



=== Table 1 Subtable: GP-CP-Extended ===


metric_stat,scenario,effect_type,CI_Width_mean_std,Coverage_mean_std,MAE_mean_std,RMSE_mean_std
16,A,alpha,0.1128 (0.0081),0.9300 (0.0570),0.0262 (0.0026),0.0325 (0.0035)
17,A,beta,0.1866 (0.0143),0.9500 (0.0391),0.0402 (0.0032),0.0492 (0.0045)
18,B,alpha,0.1223 (0.0044),0.9600 (0.0418),0.0228 (0.0043),0.0281 (0.0051)
19,B,beta,0.2014 (0.0029),0.9400 (0.0480),0.0406 (0.0071),0.0511 (0.0091)
20,C,alpha,0.0881 (0.0090),0.9100 (0.0894),0.0200 (0.0042),0.0245 (0.0051)
21,C,beta,0.1475 (0.0174),0.9600 (0.0303),0.0291 (0.0021),0.0360 (0.0031)
22,D,alpha,0.0927 (0.0038),0.9300 (0.0837),0.0200 (0.0042),0.0244 (0.0051)
23,D,beta,0.1548 (0.0088),0.9833 (0.0167),0.0291 (0.0021),0.0358 (0.0030)


Wrote:../results/summary_simulations/table1_model_fitting_comparison_all_models.csv
Wrote:../results/summary_simulations/table1_model_fitting_comparison_feplusar.csv
Wrote:../results/summary_simulations/table1_model_fitting_comparison_gp_cp.csv
Wrote:../results/summary_simulations/table1_model_fitting_comparison_gp_cp_extended.csv


### Extrapolation Table
- Step: Summarize extrapolation performance metrics by model.
- Save: Final extrapolation comparison table.


In [26]:
OUT_SIM_SUMMARY = RESULTS_ROOT / 'summary_simulations'
OUT_SIM_SUMMARY.mkdir(parents=True, exist_ok=True)

sim_ab = TABLES['sim_summary_ab_long'].copy
sim_cd = TABLES['sim_summary_cd_long'].copy
sim_all = pd.concat([sim_ab, sim_cd], ignore_index=True)

ex = sim_all[(sim_all['task'] == 'extrapolation') & (sim_all['effect_type'] == 'beta')].copy
ex = ex[ex['metric'].isin(['MAE', 'RMSE', 'Coverage', 'CI_Width'])].copy

ex['metric_col'] = ex['metric'].astype(str) + '_mean_std'
ex['value'] = ex.apply(lambda r: f"{r['mean']:.4f} ({r['std']:.4f})", axis=1)

table2 = (
 ex.pivot_table(
  index=['scenario', 'model'],
  columns='metric_col',
  values='value',
  aggfunc='first'
 )
.reset_index
)

scenario_order = ['A', 'B', 'C', 'D']
model_order = ['FE+AR', 'GP-CP', 'GP-CP-Extended']

table2['scenario'] = pd.Categorical(table2['scenario'], scenario_order, ordered=True)
table2['model'] = pd.Categorical(table2['model'], model_order, ordered=True)
table2 = table2.sort_values(['scenario', 'model']).reset_index(drop=True)

print('Table 2 preview:')
display(table2)

table2.to_csv(OUT_SIM_SUMMARY / 'table2_extrapolation_comparison_all_models.csv', index=False)
print('Wrote:', OUT_SIM_SUMMARY / 'table2_extrapolation_comparison_all_models.csv')

Table 2 preview:


metric_col,scenario,model,CI_Width_mean_std,Coverage_mean_std,MAE_mean_std,RMSE_mean_std
0,A,FE+AR,3.3284 (0.3989),0.8800 (0.0869),0.8681 (0.1590),1.0502 (0.1651)
1,A,GP-CP,2.9083 (0.3285),0.9733 (0.0596),0.5150 (0.1281),0.6283 (0.1750)
2,A,GP-CP-Extended,2.8815 (0.3403),0.9733 (0.0596),0.5083 (0.1272),0.6232 (0.1734)
3,B,FE+AR,3.2738 (0.4840),0.8933 (0.1211),0.8767 (0.1458),1.0512 (0.1525)
4,B,GP-CP,2.9199 (0.4585),0.9600 (0.0596),0.5790 (0.1813),0.6833 (0.2267)
5,B,GP-CP-Extended,2.8232 (0.4260),0.9467 (0.0558),0.5650 (0.1600),0.6747 (0.2020)
6,C,FE+AR,1.2478 (0.1077),0.8267 (0.1382),0.4056 (0.0634),0.4537 (0.0787)
7,C,GP-CP,1.0390 (0.0792),0.8667 (0.2309),0.2936 (0.0650),0.3328 (0.0648)
8,C,GP-CP-Extended,1.0146 (0.0830),0.8267 (0.2191),0.2991 (0.0530),0.3392 (0.0497)
9,D,FE+AR,1.2478 (0.1077),0.8267 (0.1382),0.4056 (0.0634),0.4537 (0.0787)


Wrote:../results/summary_simulations/table2_extrapolation_comparison_all_models.csv


### Placebo Table
- Step: Summarize placebo metrics by model.
- Save: Final placebo comparison table.


In [27]:
sim_ab = TABLES['sim_summary_ab_long'].copy
sim_cd = TABLES['sim_summary_cd_long'].copy
sim_all = pd.concat([sim_ab, sim_cd], ignore_index=True)

pl = sim_all[sim_all['task'] == 'placebo_te'].copy

metric_map = {
 'tau_mae_avg_over_cohorts': ('tau_t', 'MAE'),
 'tau_rmse_avg_over_cohorts': ('tau_t', 'RMSE'),
 'tau_coverage_avg_over_cohorts': ('tau_t', 'Coverage'),
 'tau_ci_width_avg_over_cohorts': ('tau_t', 'CI_Width'),
 'att_mae_vs_zero': ('ATT', 'MAE'),
 'att_rmse_vs_zero': ('ATT', 'RMSE'),
 'att_coverage_vs_zero': ('ATT', 'Coverage'),
 'att_ci_width_avg_over_t': ('ATT', 'CI_Width'),
}

pl = pl[pl['metric'].isin(metric_map.keys)].copy
pl['estimand'] = pl['metric'].map(lambda x: metric_map[x][0])
pl['metric_std'] = pl['metric'].map(lambda x: metric_map[x][1])
pl['metric_col'] = pl['metric_std'].astype(str) + '_mean_std'
pl['value'] = pl.apply(lambda r: f"{r['mean']:.4f} ({r['std']:.4f})", axis=1)

table3 = (
 pl.pivot_table(
  index=['scenario', 'estimand', 'model'],
  columns='metric_col',
  values='value',
  aggfunc='first'
 )
.reset_index
)

scenario_order = ['A', 'B', 'C', 'D']
estimand_order = ['tau_t', 'ATT']
model_order = ['FE+AR', 'GP-CP', 'GP-CP-Extended']

table3['scenario'] = pd.Categorical(table3['scenario'], scenario_order, ordered=True)
table3['estimand'] = pd.Categorical(table3['estimand'], estimand_order, ordered=True)
table3['model'] = pd.Categorical(table3['model'], model_order, ordered=True)
table3 = table3.sort_values(['scenario', 'estimand', 'model']).reset_index(drop=True)

print('Table 3 preview:')
display(table3)

table3.to_csv(OUT_SIM_SUMMARY / 'table3_placebo_comparison_all_models.csv', index=False)
print('Wrote:', OUT_SIM_SUMMARY / 'table3_placebo_comparison_all_models.csv')

Table 3 preview:


metric_col,scenario,estimand,model,CI_Width_mean_std,Coverage_mean_std,MAE_mean_std,RMSE_mean_std
0,A,tau_t,FE+AR,1.1714 (0.1035),0.9665 (0.0210),0.2805 (0.0440),0.2832 (0.0434)
1,A,tau_t,GP-CP,0.9872 (0.0797),0.9933 (0.0149),0.1387 (0.0312),0.1416 (0.0318)
2,A,tau_t,GP-CP-Extended,1.2178 (0.1416),0.9933 (0.0149),0.1632 (0.0442),0.1739 (0.0499)
3,A,ATT,FE+AR,0.7486 (0.0889),0.2000 (0.4472),0.6664 (0.3899),0.6664 (0.3899)
4,A,ATT,GP-CP,0.6535 (0.0732),1.0000 (0.0000),0.0093 (0.0034),0.0093 (0.0034)
5,A,ATT,GP-CP-Extended,0.6660 (0.0690),1.0000 (0.0000),0.0091 (0.0038),0.0091 (0.0038)
6,B,tau_t,FE+AR,1.8939 (0.1469),0.9728 (0.0252),0.3236 (0.0267),0.3448 (0.0272)
7,B,tau_t,GP-CP,1.4434 (0.1218),0.9940 (0.0095),0.2197 (0.0541),0.2376 (0.0567)
8,B,tau_t,GP-CP-Extended,2.0263 (0.0810),0.9918 (0.0057),0.2335 (0.0352),0.2754 (0.0363)
9,B,ATT,FE+AR,0.7737 (0.1049),0.2000 (0.4472),0.6508 (0.4119),0.6508 (0.4119)


Wrote:../results/summary_simulations/table3_placebo_comparison_all_models.csv


## Application Summary Tables
- Step: Build real-data summary tables across categories.
- Save: Final application comparison outputs.


In [28]:
OUT_APP_SUMMARY = RESULTS_ROOT / 'summary_applications'
OUT_APP_SUMMARY.mkdir(parents=True, exist_ok=True)

app_fit = TABLES['app_taskA_all_categories'].copy

cat_map = {
 'gym_instore': 'Gym Instore',
 'mealkit': 'Mealkit',
 'streaming': 'Streaming Services',
}

table4 = app_fit[[
 'category', 'model', 'mae_fit', 'rmse_fit', 'coverage', 'ci_width_mean'
]].copy

table4['category'] = table4['category'].map(cat_map).fillna(table4['category'])

table4 = table4.rename(columns={
 'category': 'Category',
 'model': 'Model',
 'mae_fit': 'MAE',
 'rmse_fit': 'RMSE',
 'coverage': 'Coverage',
 'ci_width_mean': 'CI_Width',
})

cat_order = ['Gym Instore', 'Mealkit', 'Streaming Services']
model_order = ['FE+AR', 'GP-CP', 'GP-CP-Extended']

table4['Category'] = pd.Categorical(table4['Category'], categories=cat_order, ordered=True)
table4['Model'] = pd.Categorical(table4['Model'], categories=model_order, ordered=True)

table4 = table4.sort_values(['Category', 'Model']).reset_index(drop=True)

for c in ['MAE', 'RMSE', 'Coverage', 'CI_Width']:
 table4[c] = table4[c].astype(float)

print('Table 4 preview:')
display(table4)

out_csv = OUT_APP_SUMMARY / 'table4_application_full_fitting_comparison.csv'
table4.to_csv(out_csv, index=False)
print('Wrote:', out_csv)

Table 4 preview:


Unnamed: 0,Category,Model,MAE,RMSE,Coverage,CI_Width
0,Gym Instore,FE+AR,0.098838,0.129003,0.956423,0.106202
1,Gym Instore,GP-CP,0.099514,0.13098,0.958121,0.096924
2,Gym Instore,GP-CP-Extended,0.037638,0.053565,0.938879,0.113449
3,Mealkit,FE+AR,0.168831,0.240675,0.95416,0.198136
4,Mealkit,GP-CP,0.175056,0.247704,0.955857,0.122874
5,Mealkit,GP-CP-Extended,0.028946,0.036293,0.953028,0.269776
6,Streaming Services,FE+AR,0.039187,0.05648,0.934352,0.046497
7,Streaming Services,GP-CP,0.03944,0.05666,0.934352,0.045222
8,Streaming Services,GP-CP-Extended,0.019855,0.03011,0.95133,0.04659


Wrote:../results/summary_applications/table4_application_full_fitting_comparison.csv
