# Meta-analysis starter kit â€” analysis skeleton (Python)

This notebook loads `extraction_template.csv`, computes a placeholder pooled estimate, and writes a summary table + forest plot to the kit `outputs/` folder, along with a run log.


In [None]:
from pathlib import Path
from datetime import datetime
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

BASE = Path('/Users/jtr/_JTR23_/COSMO/runtime/outputs/execution')
KIT_DIR = BASE / 'outputs' / 'meta_analysis_starter_kit'
TEMPLATE_PATH = KIT_DIR / 'extraction_template.csv'
OUT_DIR = KIT_DIR / 'outputs'
OUT_DIR.mkdir(parents=True, exist_ok=True)

TEMPLATE_PATH, OUT_DIR


In [None]:
# Load extraction template; if missing, create a minimal example to keep the notebook runnable.
required_cols = ['study_id', 'effect', 'se', 'n_treat', 'n_ctrl', 'notes']

if not TEMPLATE_PATH.exists():
    df0 = pd.DataFrame([
        {'study_id': 'Study_001', 'effect': 0.10, 'se': 0.08, 'n_treat': 100, 'n_ctrl': 100, 'notes': 'example row'},
        {'study_id': 'Study_002', 'effect': 0.22, 'se': 0.10, 'n_treat': 80,  'n_ctrl': 75,  'notes': 'example row'},
        {'study_id': 'Study_003', 'effect': -0.05,'se': 0.07, 'n_treat': 120, 'n_ctrl': 110, 'notes': 'example row'},
    ], columns=required_cols)
    TEMPLATE_PATH.parent.mkdir(parents=True, exist_ok=True)
    df0.to_csv(TEMPLATE_PATH, index=False)

df = pd.read_csv(TEMPLATE_PATH)
missing = [c for c in required_cols if c not in df.columns]
if missing:
    raise ValueError(f'Missing required columns in template: {missing}')

df.head()


In [None]:
# Placeholder meta-analysis: inverse-variance fixed-effect pooling
dat = df.copy()
dat['effect'] = pd.to_numeric(dat['effect'], errors='coerce')
dat['se'] = pd.to_numeric(dat['se'], errors='coerce')
dat = dat.dropna(subset=['effect', 'se'])
dat = dat[dat['se'] > 0]

if dat.empty:
    raise ValueError('No usable rows found (need numeric effect and positive se).')

w = 1.0 / (dat['se'].to_numpy() ** 2)
y = dat['effect'].to_numpy()
pooled = float(np.sum(w * y) / np.sum(w))
se_pooled = float(np.sqrt(1.0 / np.sum(w)))
ci_low = pooled - 1.96 * se_pooled
ci_high = pooled + 1.96 * se_pooled

summary = pd.DataFrame([{
    'k': int(dat.shape[0]),
    'model': 'fixed_effect_inverse_variance',
    'pooled_effect': pooled,
    'pooled_se': se_pooled,
    'ci_low_95': ci_low,
    'ci_high_95': ci_high,
    'template_path': str(TEMPLATE_PATH),
}])

summary_path = OUT_DIR / 'summary_table.csv'
summary.to_csv(summary_path, index=False)
summary


In [None]:
# Forest plot (study CIs + pooled estimate)
plot_df = dat.copy()
plot_df['ci_low'] = plot_df['effect'] - 1.96 * plot_df['se']
plot_df['ci_high'] = plot_df['effect'] + 1.96 * plot_df['se']
plot_df = plot_df.sort_values('effect').reset_index(drop=True)

ypos = np.arange(plot_df.shape[0])[::-1]
fig_h = max(3.0, 0.5 + 0.35 * plot_df.shape[0])
fig, ax = plt.subplots(figsize=(8, fig_h), dpi=150)

ax.hlines(y=ypos, xmin=plot_df['ci_low'], xmax=plot_df['ci_high'], color='black', linewidth=1)
ax.plot(plot_df['effect'], ypos, 'o', color='black', markersize=4)
ax.axvline(0, color='gray', linestyle='--', linewidth=1)

# Pooled estimate as a diamond
y_pool = -1
diamond_x = [ci_low, pooled, ci_high, pooled, ci_low]
diamond_y = [y_pool, y_pool + 0.25, y_pool, y_pool - 0.25, y_pool]
ax.fill(diamond_x, diamond_y, color='black', alpha=0.25)
ax.plot([ci_low, ci_high], [y_pool, y_pool], color='black', linewidth=1)

labels = plot_df['study_id'].astype(str).tolist() + ['Pooled']
ax.set_yticks(list(ypos) + [y_pool])
ax.set_yticklabels(labels)
ax.set_xlabel('Effect size')
ax.set_title('Forest plot (placeholder fixed-effect pooling)')

xmin = float(min(plot_df['ci_low'].min(), ci_low))
xmax = float(max(plot_df['ci_high'].max(), ci_high))
pad = 0.08 * (xmax - xmin) if xmax > xmin else 0.5
ax.set_xlim(xmin - pad, xmax + pad)
ax.set_ylim(y_pool - 0.75, ypos.max() + 0.75)
plt.tight_layout()

forest_path = OUT_DIR / 'forest_plot.png'
fig.savefig(forest_path, bbox_inches='tight')
plt.close(fig)

forest_path


In [None]:
# Save a simple run log
run_log = {
    'timestamp_utc': datetime.utcnow().isoformat(timespec='seconds') + 'Z',
    'template_path': str(TEMPLATE_PATH),
    'summary_table': str(summary_path),
    'forest_plot': str(forest_path),
    'k': int(summary.loc[0, 'k']),
    'pooled_effect': float(summary.loc[0, 'pooled_effect']),
    'pooled_se': float(summary.loc[0, 'pooled_se']),
    'ci_low_95': float(summary.loc[0, 'ci_low_95']),
    'ci_high_95': float(summary.loc[0, 'ci_high_95']),
}

run_log_path = OUT_DIR / 'run_log.json'
run_log_path.write_text(json.dumps(run_log, indent=2), encoding='utf-8')
run_log_path
