In [1]:
from pathlib import Path
import pandas as pd
from astropy.table import Table

BASE = Path.home() / "thesis"
raw = BASE / "data" / "raw" / "agn" / "table-4LAC-DR3-h.fits"
out = BASE / "data" / "processed" / "agn" / "agn_4lac_clean.csv"
out.parent.mkdir(parents=True, exist_ok=True)

t = Table.read(raw)
df = t.to_pandas()

# Keep a robust subset (works even if exact column names vary)
def keep_if_contains(col):
    key = col.lower()
    return any(k in key for k in [
        "source", "name", "class", "ra", "dec",
        "redshift", "z", "pl_index", "photon", "index",
        "variab", "flux", "eflux", "energyflux"
    ])

keep = [c for c in df.columns if keep_if_contains(c)]
df = df[sorted(set(keep))].copy()

df.to_csv(out, index=False)
print("Saved:", out, "rows:", len(df), "cols:", len(df.columns))


Saved: /home/david/thesis/data/processed/agn/agn_4lac_clean.csv rows: 3407 cols: 18


In [1]:
import pandas as pd
import numpy as np
from pathlib import Path

BASE = Path.home() / "thesis"
DATA = BASE / "data" / "processed"
OUT_TABLES = BASE / "outputs" / "tables"
OUT_TEX = BASE / "outputs" / "tex"
OUT_TABLES.mkdir(parents=True, exist_ok=True)
OUT_TEX.mkdir(parents=True, exist_ok=True)

agn = pd.read_csv(DATA/"agn/agn_4lac_clean.csv", low_memory=False)

# Find class column and photon index column automatically
def pick(df, keys):
    for k in keys:
        for c in df.columns:
            if k in c.lower():
                return c
    return None

class_col = pick(agn, ["class"])
idx_col   = pick(agn, ["pl_index", "photon", "spectral", "index", "gamma"])
var_col   = pick(agn, ["variab"])

print("Using:", class_col, idx_col, var_col)

cls = agn[class_col].astype(str).str.lower()
idx = pd.to_numeric(agn[idx_col], errors="coerce")
var = pd.to_numeric(agn[var_col], errors="coerce") if var_col else None

mask_bll  = cls.str.contains("bll")
mask_fsrq = cls.str.contains("fsrq")

bll = idx[mask_bll].dropna()
fsrq = idx[mask_fsrq].dropna()

summary = {
    "N_total_sources": int(len(agn)),
    "N_BLL": int(mask_bll.sum()),
    "N_FSRQ": int(mask_fsrq.sum()),
    "Gamma_median_BLL": float(np.median(bll)) if len(bll) else np.nan,
    "Gamma_median_FSRQ": float(np.median(fsrq)) if len(fsrq) else np.nan,
}

# add variability medians if available
if var_col:
    vb = var[mask_bll].dropna()
    vf = var[mask_fsrq].dropna()
    summary["Variability_median_BLL"] = float(np.median(vb)) if len(vb) else np.nan
    summary["Variability_median_FSRQ"] = float(np.median(vf)) if len(vf) else np.nan

out_df = pd.DataFrame([summary])
out_df.to_csv(OUT_TABLES/"agn_summary_stats.csv", index=False)

tex = (
    f"\\newcommand{{\\AGNTot}}{{{summary['N_total_sources']}}}\n"
    f"\\newcommand{{\\AGNBLL}}{{{summary['N_BLL']}}}\n"
    f"\\newcommand{{\\AGNFSRQ}}{{{summary['N_FSRQ']}}}\n"
)
(OUT_TEX/"agn_numbers.tex").write_text(tex)

print("Saved:", OUT_TABLES/"agn_summary_stats.csv")
print("Saved:", OUT_TEX/"agn_numbers.tex")
out_df


Using: CLASS PL_Index Frac_Variability
Saved: /home/david/thesis/outputs/tables/agn_summary_stats.csv
Saved: /home/david/thesis/outputs/tex/agn_numbers.tex


Unnamed: 0,N_total_sources,N_BLL,N_FSRQ,Gamma_median_BLL,Gamma_median_FSRQ,Variability_median_BLL,Variability_median_FSRQ
0,3407,1379,755,2.023715,2.449434,0.279434,0.648587
