# Real-Data Experiments
- Goal: Run model fitting and placebo analysis for each dataset category.
- Flow: Load input spec, validate data, run Task A and Task B, save outputs.


## Notebook Structure
- Sections: Input spec, runtime config, data validation, fitting, and placebo.
- Output: Category-specific tables, figures, and metadata under `results/application`.


In [1]:
import sys
import json
sys.path.insert(0, '..')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

from src.data import (
 parse_category_path_spec,
 prepare_real_data_monthly,
 make_real_data_qa,
)
from src.models import FixedEffectsModel, CohortPeriodModel, CohortPeriodExtendedModel, run_placebo_test
from src.visualization import *

print('Imports successful!')

from src.evaluation import build_support_aware_model_time_summary


Imports successful!


## Input Spec
- Format: One `{category}:{path}` entry per line.
- Step: Select a category and run the same pipeline for each listed dataset.


In [2]:

# Read input spec and select default dataset (gym_instore if available)
INPUT_SPEC_PATH = Path('../results/application/input_data_paths.txt')
entries = parse_category_path_spec(INPUT_SPEC_PATH)

default_category = 'gym_instore'
selected = None
for cat, pth in entries:
 if cat == default_category:
  selected = (cat, pth)
  break
if selected is None:
 selected = entries[0]

category, data_path = selected
time_level = 'month'
dv = 'spend_normalized'
start_date = pd.to_datetime('2017-01-01')
end_date = pd.to_datetime('2022-06-12')
covid_onset = pd.to_datetime('2020-03-01')

print(f'Input spec: {INPUT_SPEC_PATH}')
print('Available entries:')
for cat, pth in entries:
 print(f' - {cat}: {pth}')
print('\nSelected default run:')
print(f' category : {category}')
print(f' data_path : {data_path}')
print(f' time_level : {time_level}')
print(f' dv   : {dv}')
print(f' window  : {start_date.date} -> {end_date.date}')
print(f' covid_onset: {covid_onset.date}')


Input spec:../results/application/input_data_paths.txt 
Available entries:
 - gym_instore:/path/to/your/data.csv 
 - mealkit:/path/to/your/data.csv 
 - streaming:/path/to/your/data.csv 

Selected default run:
 category : gym_instore
 data_path : /path/to/your/project 
 time_level : month
 dv   : spend_normalized
 window  : 2017-01-01 -> 2022-06-12
 covid_onset: 2020-03-01


## Runtime Config and Output Layout
- Step: Set dates, target variable, and output directories.
- Output: Consistent file layout for downstream tables and plots.


## Data Loader and Validation
- Step: Load data, apply required checks, and build cohort-period panel inputs.
- Output: Validated panel data for model runs.


In [3]:
# Preprocess + QA checks + reproducibility outputs
cohort_df, stan_data, step_counts = prepare_real_data_monthly(
 data_path=data_path,
 dv=dv,
 start_date=start_date,
 end_date=end_date,
 covid_onset=covid_onset,
)

qa = make_real_data_qa(cohort_df, stan_data, step_counts)

print('Preprocessing complete.')
print(f" N : {qa['N']}")
print(f" K1 : {qa['K1']}")
print(f" K2 : {qa['K2']}")
print(f" K3 : {qa['K3']}")
print(f" T_pre: {qa['T_pre']}")

print('\nIndex ranges:')
for k, v in qa['index_ranges'].items:
 print(f' {k}: {v[0]}.. {v[1]}')

print('\nPre/post split validity:')
print(f" n_post_rows  : {qa['post_split']['n_post_rows']}")
print(f" post_start_time_idx: {qa['post_split']['post_start_time_idx']}")

print('\nDropped-row summary by step:')
for k, v in qa['dropped_rows'].items:
 print(f' {k}: {v}')

category_root = Path('../results/application') / category
tables_dir = category_root / 'tables'
metadata_dir = category_root / 'metadata'
tables_dir.mkdir(parents=True, exist_ok=True)
metadata_dir.mkdir(parents=True, exist_ok=True)

compact_cols = [
 c for c in [
  'month', 'cohort', 'cohort_month', dv, 'dv_log',
  'cohort_time_idx', 'cohort_idx', 'time_idx', 'is_post'
 ] if c in cohort_df.columns
]
compact_path = tables_dir / 'preprocessed_compact.csv'
qa_path = metadata_dir / 'preprocess_qa.json'

cohort_df[compact_cols].to_csv(compact_path, index=False)
qa_path.write_text(json.dumps(qa, indent=2, default=str))

print('\nSaved reproducibility artifacts:')
print(f' - {compact_path}')
print(f' - {qa_path}')

cohort_df.head


Preprocessing complete.
 N : 1767
 K1 : 65
 K2 : 38
 K3 : 65
 T_pre: 37

Index ranges:
 cohort_time_idx: 0.. 64
 cohort_idx: 0.. 37
 time_idx: 0.. 64

Pre/post split validity:
 n_post_rows  : 1064
 post_start_time_idx: 37

Dropped-row summary by step:
 dropna_required: 0
 outside_date_window: 157
 cohort_not_pre_covid: 1198
 non_positive_cohort_age: 38

Saved reproducibility artifacts:
 -../results/application/gym_instore/tables/preprocessed_compact.csv
 -../results/application/gym_instore/metadata/preprocess_qa.json


Unnamed: 0,month,cohort,Customers,Orders,Spend,Avg_Spend,Orders_per_Active,Orders_per_Cohort,N_cohort,cohort_month,spend_normalized,orders_normalized,active_prop,acq,cohort_time_idx,cohort_idx,time_idx,is_post,dv_log
0,2017-02-01,2017-01-01,0.01991,0.0253,0.8495,33.58,1.271,0.9895,0.02556,1,33.23,0.9895,0.7787,0.0,0,0,0,0,3.503453
1,2017-03-01,2017-01-01,0.02013,0.03371,1.206,35.76,1.675,1.319,0.02556,2,47.17,1.319,0.7876,0.0,1,0,1,0,3.853758
2,2017-04-01,2017-01-01,0.01915,0.02428,0.8342,34.36,1.268,0.9497,0.02556,3,32.63,0.9497,0.749,0.0,2,0,2,0,3.485232
3,2017-05-01,2017-01-01,0.01831,0.02225,0.761,34.21,1.215,0.8703,0.02556,4,29.77,0.8703,0.7163,0.0,3,0,3,0,3.393501
4,2017-06-01,2017-01-01,0.01773,0.02203,0.749,33.99,1.242,0.8619,0.02556,5,29.3,0.8619,0.6937,0.0,4,0,4,0,3.377588


In [4]:
# Save cohort-level plots for every category as TWO separate figures (raw/log)
all_plot_outputs = []
for cat, pth in entries:
 try:
  df_cat, _, _ = prepare_real_data_monthly(
   data_path=pth,
   dv=dv,
   start_date=start_date,
   end_date=end_date,
   covid_onset=covid_onset,
  )

  cohort_dir = Path('../results/application') / cat / 'cohort'
  out = save_realdata_cohort_panel_plots_separate(
   df=df_cat,
   output_dir=cohort_dir,
   category_name=cat,
   dv_col=dv,
   covid_onset=covid_onset,
   include_pdf=True,
  )
  all_plot_outputs.append({'category': cat, **out})

  print(f'Saved cohort plots for {cat}:')
  print(f" - {out['raw_png']}")
  print(f" - {out['log_png']}")
 except Exception as e:
  print(f"[WARN] Failed plotting for {cat} ({pth}): {e}")

pd.DataFrame(all_plot_outputs)


Figure saved ->../results/application/gym_instore/cohort/cohort_panel_raw.png
Figure saved ->../results/application/gym_instore/cohort/cohort_panel_log.png
Figure saved ->../results/application/gym_instore/cohort/cohort_panel_raw.pdf
Figure saved ->../results/application/gym_instore/cohort/cohort_panel_log.pdf
Saved cohort plots for gym_instore:
 -../results/application/gym_instore/cohort/cohort_panel_raw.png
 -../results/application/gym_instore/cohort/cohort_panel_log.png
Figure saved ->../results/application/mealkit/cohort/cohort_panel_raw.png
Figure saved ->../results/application/mealkit/cohort/cohort_panel_log.png
Figure saved ->../results/application/mealkit/cohort/cohort_panel_raw.pdf
Figure saved ->../results/application/mealkit/cohort/cohort_panel_log.pdf
Saved cohort plots for mealkit:
 -../results/application/mealkit/cohort/cohort_panel_raw.png
 -../results/application/mealkit/cohort/cohort_panel_log.png
Figure saved ->../results/application/streaming/cohort/cohort_panel_raw.

Unnamed: 0,category,raw_png,log_png,raw_pdf,log_pdf
0,gym_instore,../results/application/gym_instore/cohort/coho...,../results/application/gym_instore/cohort/coho...,../results/application/gym_instore/cohort/coho...,../results/application/gym_instore/cohort/coho...
1,mealkit,../results/application/mealkit/cohort/cohort_p...,../results/application/mealkit/cohort/cohort_p...,../results/application/mealkit/cohort/cohort_p...,../results/application/mealkit/cohort/cohort_p...
2,streaming,../results/application/streaming/cohort/cohort...,../results/application/streaming/cohort/cohort...,../results/application/streaming/cohort/cohort...,../results/application/streaming/cohort/cohort...


## Task A: Model Fitting
- Step: Fit models and generate fitting/extrapolation diagnostics per category.
- Save: Metrics tables and figures for model comparison.


In [5]:
# Task A loop: run model fitting once per category and save outputs
task_a_summary_rows = []
task_a_model_metrics_all = []

for cat, pth in entries:
 print('=' * 88)
 print(f'[Task A] Category: {cat}')

 category_root = Path('../results/application') / cat
 fit_dir = category_root / 'fit'
 tables_dir = category_root / 'tables'
 metadata_dir = category_root / 'metadata'
 for d in [fit_dir, tables_dir, metadata_dir]:
  d.mkdir(parents=True, exist_ok=True)

 df_cat, stan_cat, step_counts_cat = prepare_real_data_monthly(
  data_path=pth,
  dv=dv,
  start_date=start_date,
  end_date=end_date,
  covid_onset=covid_onset,
 )

 y_np = df_cat['dv_log'].to_numpy(dtype=float)
 obs_c = df_cat['cohort_idx'].to_numpy(dtype=int)
 obs_t = df_cat['time_idx'].to_numpy(dtype=int)
 n_c = int(df_cat['cohort_idx'].nunique)
 n_t = int(df_cat['time_idx'].nunique)
 support_mask_cat = np.zeros((n_c, n_t), dtype=bool)
 support_mask_cat[obs_c, obs_t] = True

 models = {
  'FE+AR': FixedEffectsModel(use_global_mean=True),
  'GP-CP': CohortPeriodModel(use_global_mean=True),
  'GP-CP-Extended': CohortPeriodExtendedModel(use_global_mean=True),
 }

 fit_results = {}
 for model_name, model in models.items:
  model.fit(y=y_np, obs_c=obs_c, obs_t=obs_t, n_cohorts=n_c, n_periods=n_t)
  out = model.results_dict
  fit_results[model_name] = out

  resid = np.asarray(out['resid'], dtype=float)
  y_hat = np.asarray(out['y_hat'], dtype=float)
  rmse = float(np.sqrt(np.mean((y_np - y_hat) ** 2)))
  mae = float(np.mean(np.abs(y_np - y_hat)))

  # Empirical 95% predictive coverage using residual std
  resid_std = float(np.std(resid))
  pred_lo = y_hat - 1.96 * resid_std
  pred_hi = y_hat + 1.96 * resid_std
  coverage = float(np.mean((y_np >= pred_lo) & (y_np <= pred_hi)))

  std_alpha = np.asarray(out.get('std_alpha', np.array([])), dtype=float)
  std_beta = np.asarray(out.get('std_beta', np.array([])), dtype=float)
  width_alpha = 3.92 * std_alpha if std_alpha.size else np.array([])
  width_beta = 3.92 * std_beta if std_beta.size else np.array([])
  if width_alpha.size and width_beta.size:
   width_all = np.concatenate([width_alpha, width_beta])
  elif width_alpha.size:
   width_all = width_alpha
  elif width_beta.size:
   width_all = width_beta
  else:
   width_all = np.array([np.nan])

  row = {
   'category': cat,
   'model': model_name,
   'N': int(len(y_np)),
   'K1': int(stan_cat['K1']),
   'K2': int(stan_cat['K2']),
   'K3': int(stan_cat['K3']),
   'T_pre': int(stan_cat['T_pre']),
   'mu': float(out.get('mu', 0.0)),
   'rmse_fit': rmse,
   'mae_fit': mae,
   'coverage': coverage,
   'ci_width_mean': float(np.nanmean(width_all)),
   'ci_width_std': float(np.nanstd(width_all)),
   'ci_width_alpha_mean': float(np.nanmean(width_alpha)) if width_alpha.size else np.nan,
   'ci_width_alpha_std': float(np.nanstd(width_alpha)) if width_alpha.size else np.nan,
   'ci_width_beta_mean': float(np.nanmean(width_beta)) if width_beta.size else np.nan,
   'ci_width_beta_std': float(np.nanstd(width_beta)) if width_beta.size else np.nan,
   'resid_mean': float(np.mean(resid)),
   'resid_std': resid_std,
  }
  task_a_model_metrics_all.append(row)

 fit_support_summary_df = build_support_aware_model_time_summary(
  category=cat,
  fit_results=fit_results,
  support_mask=support_mask_cat,
  model_order=['FE+AR', 'GP-CP', 'GP-CP-Extended'],
  scope='full_fit',
 )
 fit_support_summary_path = tables_dir / 'taskA_fit_support_summary.csv'
 fit_support_summary_df.to_csv(fit_support_summary_path, index=False)

 metrics_df_cat = pd.DataFrame([r for r in task_a_model_metrics_all if r['category'] == cat])
 metrics_path = tables_dir / 'taskA_model_metrics.csv'
 metrics_df_cat.to_csv(metrics_path, index=False)

 # Save serialized fit outputs (compact arrays to lists for JSON)
 fit_json = {}
 for model_name, out in fit_results.items:
  fit_json[model_name] = {}
  for k, v in out.items:
   if isinstance(v, np.ndarray):
    fit_json[model_name][k] = v.tolist
   elif isinstance(v, (np.floating, np.integer)):
    fit_json[model_name][k] = float(v)
   else:
    fit_json[model_name][k] = v

 fit_json_path = metadata_dir / 'taskA_fit_results.json'
 fit_json_path.write_text(json.dumps(fit_json, indent=2, default=str))

 covid_rows = df_cat.loc[df_cat['month'] >= covid_onset, 'time_idx']
 covid_t_idx = int(covid_rows.min) if len(covid_rows) else None

 # plots (existing overlays)
 fig_obs = plot_realdata_taska_observed_vs_fitted(
  df=df_cat,
  fit_results=fit_results,
  treatment_time_idx=covid_t_idx,
  treatment_label='COVID onset',
  save_path=str(fit_dir / 'taskA_observed_vs_fitted.png'),
 )
 plt.close(fig_obs)

 fig_beta = plot_realdata_taska_beta_comparison(
  fit_results=fit_results,
  treatment_time_idx=covid_t_idx,
  treatment_label='COVID onset',
  save_path=str(fit_dir / 'taskA_beta_comparison.png'),
 )
 plt.close(fig_beta)

 fig_alpha = plot_realdata_taska_alpha_comparison(
  fit_results=fit_results,
  save_path=str(fit_dir / 'taskA_alpha_comparison.png'),
 )
 plt.close(fig_alpha)

 # Additional fixed-y-axis small-multiples (3 subplots, one per model)
 fig_alpha_sm = plot_realdata_taska_effects_small_multiples_fixed_ylim(
  fit_results=fit_results,
  effect_key='alpha',
  save_path=str(fit_dir / 'taskA_alpha_small_multiples_fixed_ylim.png'),
 )
 plt.close(fig_alpha_sm)

 fig_beta_sm = plot_realdata_taska_effects_small_multiples_fixed_ylim(
  fit_results=fit_results,
  effect_key='beta',
  treatment_time_idx=covid_t_idx,
  treatment_label='COVID onset',
  save_path=str(fit_dir / 'taskA_beta_small_multiples_fixed_ylim.png'),
 )
 plt.close(fig_beta_sm)

 fig_trends = plot_realdata_cohort_y_trends_by_model(
  fit_results=fit_results,
  support_mask=support_mask_cat,
  treatment_time_idx=covid_t_idx,
  treatment_label='COVID onset',
  save_path=str(fit_dir / 'taskA_cohort_trends_by_model.png'),
 )
 plt.close(fig_trends)

 fig_resid = plot_residual_overlay(
  residuals_by_model={m: np.asarray(fit_results[m]['resid'], dtype=float) for m in fit_results},
  title='Task A residual overlay by model',
  save_path=str(fit_dir / 'taskA_residual_overlay.png'),
 )
 plt.close(fig_resid)

 gamma_ext = np.asarray(fit_results['GP-CP-Extended']['gamma'], dtype=float)
 fig_gamma = plot_realdata_taska_gamma_heatmap(
  gamma=gamma_ext,
  save_path=str(fit_dir / 'taskA_gamma_heatmap.png'),
 )
 plt.close(fig_gamma)

 qa_cat = make_real_data_qa(df_cat, stan_cat, step_counts_cat)
 task_a_meta = {
  'category': cat,
  'data_path': str(pth),
  'dv': dv,
  'time_level': time_level,
  'date_window': {'start': str(start_date.date), 'end': str(end_date.date)},
  'covid_onset': str(covid_onset.date),
  'models': list(models.keys),
  'qa': qa_cat,
  'artifacts': {
   'metrics_csv': str(metrics_path),
   'fit_support_summary_csv': str(fit_support_summary_path),
   'fit_results_json': str(fit_json_path),
   'plots': [
    str(fit_dir / 'taskA_observed_vs_fitted.png'),
    str(fit_dir / 'taskA_beta_comparison.png'),
    str(fit_dir / 'taskA_alpha_comparison.png'),
    str(fit_dir / 'taskA_alpha_small_multiples_fixed_ylim.png'),
    str(fit_dir / 'taskA_beta_small_multiples_fixed_ylim.png'),
    str(fit_dir / 'taskA_cohort_trends_by_model.png'),
    str(fit_dir / 'taskA_residual_overlay.png'),
    str(fit_dir / 'taskA_gamma_heatmap.png'),
   ],
  },
 }
 meta_path = metadata_dir / 'taskA_metadata.json'
 meta_path.write_text(json.dumps(task_a_meta, indent=2, default=str))

 best_rmse_row = metrics_df_cat.sort_values('rmse_fit').iloc[0].to_dict
 task_a_summary_rows.append({
  'category': cat,
  'best_model_by_rmse': best_rmse_row['model'],
  'best_rmse_fit': float(best_rmse_row['rmse_fit']),
  'metrics_csv': str(metrics_path),
 })

 print(f'[Task A] Saved outputs for {cat}:')
 print(f' - {metrics_path}')
 print(f' - {fit_json_path}')
 print(f' - {fit_support_summary_path}')
 print(f' - {meta_path}')

task_a_summary_df = pd.DataFrame(task_a_summary_rows)
task_a_all_metrics_df = pd.DataFrame(task_a_model_metrics_all)

summary_dir = Path('../results/application/summary')
summary_dir.mkdir(parents=True, exist_ok=True)
task_a_summary_df.to_csv(summary_dir / 'taskA_category_summary.csv', index=False)
task_a_all_metrics_df.to_csv(summary_dir / 'taskA_model_metrics_all_categories.csv', index=False)

print('\n[Task A] Aggregated summary outputs:')
print(f" - {summary_dir / 'taskA_category_summary.csv'}")
print(f" - {summary_dir / 'taskA_model_metrics_all_categories.csv'}")

task_a_summary_df


[Task A] Category: gym_instore
Figure saved ->../results/application/gym_instore/fit/taskA_observed_vs_fitted.png
Figure saved ->../results/application/gym_instore/fit/taskA_beta_comparison.png
Figure saved ->../results/application/gym_instore/fit/taskA_alpha_comparison.png
Figure saved ->../results/application/gym_instore/fit/taskA_alpha_small_multiples_fixed_ylim.png
Figure saved ->../results/application/gym_instore/fit/taskA_beta_small_multiples_fixed_ylim.png
Figure saved ->../results/application/gym_instore/fit/taskA_cohort_trends_by_model.png
Figure saved ->../results/application/gym_instore/fit/taskA_residual_overlay.png
Figure saved ->../results/application/gym_instore/fit/taskA_gamma_heatmap.png
[Task A] Saved outputs for gym_instore:
 -../results/application/gym_instore/tables/taskA_model_metrics.csv
 -../results/application/gym_instore/metadata/taskA_fit_results.json
 -../results/application/gym_instore/tables/taskA_fit_support_summary.csv
 -../results/application/gym_instor

Unnamed: 0,category,best_model_by_rmse,best_rmse_fit,metrics_csv
0,gym_instore,GP-CP-Extended,0.053565,../results/application/gym_instore/tables/task...
1,mealkit,GP-CP-Extended,0.036293,../results/application/mealkit/tables/taskA_mo...
2,streaming,GP-CP-Extended,0.03011,../results/application/streaming/tables/taskA_...


## Task B: Placebo Test
- Step: Run placebo workflow with zero-effect truth on post periods.
- Save: Placebo summary metrics and diagnostic outputs by category.


In [6]:
# Task B: re-import/reload modules without restarting kernel
import importlib

_module_names = [
 'src.data.loaders',
 'src.models.fixed_effects',
 'src.models.cohort_period',
 'src.models.cohort_period_extended',
 'src.evaluation.placebo_effects',
 'src.visualization.plots',
 'src.models.run_placebo_test',
 'src.data',
 'src.models',
 'src.visualization',
]

for _name in _module_names:
 _mod = importlib.import_module(_name)
 importlib.reload(_mod)

from src.data import parse_category_path_spec, prepare_real_data_monthly, make_real_data_qa
from src.models import FixedEffectsModel, CohortPeriodModel, CohortPeriodExtendedModel, run_placebo_test
from src.visualization import *

print('Task B re-import/reload complete.')


Task B re-import/reload complete.


In [7]:
# Task B loop: placebo test once per category and save outputs/plots
placebo_summary_rows = []
placebo_split_metric_rows = []

for cat, pth in entries:
 print('=' * 88)
 print(f'[Task B] Category: {cat}')

 df_cat, _, _ = prepare_real_data_monthly(
  data_path=pth,
  dv=dv,
  start_date=start_date,
  end_date=end_date,
  covid_onset=covid_onset,
 )

 out_b = run_placebo_test(
  cohort_df=df_cat,
  category=cat,
  output_root=Path('../results/application'),
  covid_onset=covid_onset,
  placebo_years_back=1,
  month_col='month',
  cohort_col='cohort',
  dv_log_col='dv_log',
  save_artifacts=True,
  save_plots=True,
 )

 te_summary_cat = out_b['te_summary_df'].copy
 split_metrics_cat = out_b['split_metrics_df'].copy
 te_summary_cat['category'] = cat
 split_metrics_cat['category'] = cat

 placebo_summary_rows.append(te_summary_cat)
 placebo_split_metric_rows.append(split_metrics_cat)

 print(f"[Task B] Saved outputs for {cat}:")
 for k, v in out_b['written_artifacts'].items:
  print(f' - {k}: {v}')

summary_dir = Path('../results/application/summary')
summary_dir.mkdir(parents=True, exist_ok=True)

if placebo_summary_rows:
 placebo_summary_all = pd.concat(placebo_summary_rows, ignore_index=True)
else:
 placebo_summary_all = pd.DataFrame

if placebo_split_metric_rows:
 placebo_split_metrics_all = pd.concat(placebo_split_metric_rows, ignore_index=True)
else:
 placebo_split_metrics_all = pd.DataFrame

path_summary = summary_dir / 'taskB_placebo_te_summary_all_categories.csv'
path_split = summary_dir / 'taskB_placebo_split_metrics_all_categories.csv'
placebo_summary_all.to_csv(path_summary, index=False)
placebo_split_metrics_all.to_csv(path_split, index=False)

print('[Task B] Aggregated summary outputs:')
print(f' - {path_summary}')
print(f' - {path_split}')

placebo_summary_all


[Task B] Category: gym_instore
Figure saved ->../results/application/gym_instore/placebo/tau_t_by_model_lines_ci.png
Figure saved ->../results/application/gym_instore/placebo/tau_extended_cohort_lines_ci.png
Figure saved ->../results/application/gym_instore/placebo/placebo_cohort_trends_by_model.png
Figure saved ->../results/application/gym_instore/placebo/placebo_beta_diagnostic_3panel.png
Figure saved ->../results/application/gym_instore/placebo/placebo_residual_overlay.png
[Task B] Saved outputs for gym_instore:
 - tau_t_csv:../results/application/gym_instore/placebo/tau_t_table.csv
 - tau_ct_csv:../results/application/gym_instore/placebo/tau_ct_table.csv
 - te_summary_csv:../results/application/gym_instore/placebo/te_post_summary_table.csv
 - fit_support_summary_train_csv:../results/application/gym_instore/placebo/fit_support_summary_train.csv
 - fit_support_summary_full_csv:../results/application/gym_instore/placebo/fit_support_summary_full.csv
 - split_metrics_csv:../results/appl

Unnamed: 0,category,model,tau_mae_avg_over_cohorts,tau_rmse_avg_over_cohorts,tau_coverage_avg_over_cohorts,tau_ci_width_avg_over_cohorts,tau_ci_width_std_over_cohorts,att_avg_over_t,att_mae_vs_zero,att_rmse_vs_zero,att_coverage_vs_zero,att_ci_width_avg_over_t
0,gym_instore,FE+AR,0.069581,0.093073,0.924168,0.424536,0.299167,-0.037535,0.037535,0.037535,1.0,0.157078
1,gym_instore,GP-CP,0.161614,0.169609,0.817457,0.467618,0.27533,-0.408201,0.408201,0.408201,0.0,0.164757
2,gym_instore,GP-CP-Extended,0.071766,0.082775,0.971933,0.69459,0.206896,-0.170699,0.170699,0.170699,0.0,0.16543
3,mealkit,FE+AR,0.153565,0.199613,0.804058,0.56598,0.238664,-0.011607,0.011607,0.011607,1.0,0.148508
4,mealkit,GP-CP,0.286333,0.320566,0.772048,0.736965,0.520128,-0.66935,0.66935,0.66935,0.0,0.226961
5,mealkit,GP-CP-Extended,0.439651,0.536202,0.884615,1.927345,0.994223,-1.195559,1.195559,1.195559,0.0,0.562622
6,streaming,FE+AR,0.096636,0.104383,0.761449,0.22784,0.178324,0.216823,0.216823,0.216823,0.0,0.090755
7,streaming,GP-CP,0.161343,0.165288,0.686029,0.303616,0.201624,0.463019,0.463019,0.463019,0.0,0.114511
8,streaming,GP-CP-Extended,0.15337,0.155414,0.689189,0.355184,0.178982,0.461491,0.461491,0.461491,0.0,0.117472
