In [1]:
from pathlib import Path

BASE = Path.home() / "thesis"
DATA = BASE / "data" / "processed"
OUT  = BASE / "outputs"
OUT_TABLES = OUT / "tables"
OUT_TEX    = OUT / "tex"

for p in [OUT_TABLES, OUT_TEX]:
    p.mkdir(parents=True, exist_ok=True)

print("DATA:", DATA)
print("OUT :", OUT)


DATA: /home/david/thesis/data/processed
OUT : /home/david/thesis/outputs


In [2]:
import pandas as pd
import numpy as np

grb = pd.read_csv(DATA/"grb/grb_gbm_clean.csv", low_memory=False)

t90 = pd.to_numeric(grb["T90"], errors="coerce")
t90 = t90[(t90 > 0) & np.isfinite(t90)]

N_total = len(t90)
N_short = int((t90 < 2.0).sum())
N_long  = int((t90 >= 2.0).sum())
frac_short = N_short / N_total

stats = {
    "N_total": N_total,
    "N_short_T90_lt_2s": N_short,
    "N_long_T90_ge_2s": N_long,
    "short_fraction": frac_short,
    "T90_median_s": float(np.median(t90)),
    "T90_16pct_s": float(np.percentile(t90, 16)),
    "T90_84pct_s": float(np.percentile(t90, 84)),
}

summary = pd.DataFrame([stats])
summary.to_csv(OUT_TABLES/"grb_summary_stats.csv", index=False)

# Write a tiny LaTeX snippet you can \input{} in Overleaf
tex = (
    f"\\newcommand{{\\GRBTotal}}{{{N_total}}}\n"
    f"\\newcommand{{\\GRBShort}}{{{N_short}}}\n"
    f"\\newcommand{{\\GRBLong}}{{{N_long}}}\n"
    f"\\newcommand{{\\GRBShortFrac}}{{{frac_short:.3f}}}\n"
)
(OUT_TEX/"grb_numbers.tex").write_text(tex)

print("Saved:", OUT_TABLES/"grb_summary_stats.csv")
print("Saved:", OUT_TEX/"grb_numbers.tex")
summary


Saved: /home/david/thesis/outputs/tables/grb_summary_stats.csv
Saved: /home/david/thesis/outputs/tex/grb_numbers.tex


Unnamed: 0,N_total,N_short_T90_lt_2s,N_long_T90_ge_2s,short_fraction,T90_median_s,T90_16pct_s,T90_84pct_s
0,4208,703,3505,0.167063,18.688,1.792,64.001
