In [2]:
import numpy as np
import pandas as pd

# ---- load your cleaned GBM catalog ----
path = "/home/david/thesis/data/processed/grb/grb_gbm_clean.csv"
df = pd.read_csv(path)

print("Loaded:", path)
print("Shape:", df.shape)

# ---- find T90 column robustly ----
preferred = ["T90", "t90", "t90_s", "T90_s", "T90 (s)", "t90_sec", "T90_sec"]
t90_col = None

for c in preferred:
    if c in df.columns:
        t90_col = c
        break

if t90_col is None:
    # fallback: any column containing 't90'
    candidates = [c for c in df.columns if "t90" in c.lower()]
    if len(candidates) == 1:
        t90_col = candidates[0]
    elif len(candidates) > 1:
        # choose the first numeric-looking candidate
        num_cands = []
        for c in candidates:
            x = pd.to_numeric(df[c], errors="coerce")
            if x.notna().sum() > 0:
                num_cands.append(c)
        if len(num_cands) == 0:
            raise KeyError(f"Found T90-like cols {candidates} but none numeric.")
        t90_col = num_cands[0]
    else:
        raise KeyError("No T90 column found. Columns are:\n" + "\n".join(df.columns))

# ---- clean T90 values ----
t90 = pd.to_numeric(df[t90_col], errors="coerce").dropna().to_numpy()
t90 = t90[np.isfinite(t90)]
t90 = t90[t90 > 0]  # optional safety; remove non-physical values

# ---- compute Table 1 numbers ----
N = int(len(t90))
short = int(np.sum(t90 < 2.0))
long = int(np.sum(t90 >= 2.0))
short_frac = short / N if N else np.nan

median = float(np.median(t90)) if N else np.nan
p16, p84 = np.percentile(t90, [16, 84]) if N else (np.nan, np.nan)

# ---- print results in the table style ----
print("\n=== Table 1 recomputation ===")
print(f"T90 column used: {t90_col}")
print(f"Total GRBs (N)          {N}")
print(f"Short GRBs (T90 < 2 s)  {short}")
print(f"Long GRBs (T90 ≥ 2 s)   {long}")
print(f"Short fraction          {short_frac:.3f}")
print(f"Median T90 (s)          {median:.3f}")
print(f"T90 16–84% range (s)    {p16:.3f}–{p84:.3f}")

Loaded: /home/david/thesis/data/processed/grb/grb_gbm_clean.csv
Shape: (4208, 11)

=== Table 1 recomputation ===
T90 column used: T90
Total GRBs (N)          4208
Short GRBs (T90 < 2 s)  703
Long GRBs (T90 ≥ 2 s)   3505
Short fraction          0.167
Median T90 (s)          18.688
T90 16–84% range (s)    1.792–64.001
