In [1]:
from pathlib import Path
base = Path.home() / "thesis"

hits = []
for p in base.rglob("*"):
    if p.is_file() and any(k in p.name.lower() for k in ["gbm", "grb", "t90", "fermi"]):
        hits.append(p)

for p in sorted(hits)[:50]:
    print(p)
print("Total hits:", len(hits))


/home/david/thesis/data/processed/grb/grb_gbm_clean.csv
/home/david/thesis/data/raw/grb/fermigbrst.csv
/home/david/thesis/figures/Fig01_GRB_T90_hist.png
/home/david/thesis/figures/Fig02_GRB_fluence_vs_T90.png
/home/david/thesis/notebooks/.ipynb_checkpoints/10_grb_plots-checkpoint.ipynb
/home/david/thesis/notebooks/10_grb_plots.ipynb
/home/david/thesis/outputs/tables/grb_summary_stats.csv
/home/david/thesis/outputs/tex/grb_numbers.tex
Total hits: 8


In [2]:
import sys
print(sys.executable)


/home/david/envs/thesis/bin/python


In [3]:
import pandas as pd
import numpy as np

path = "/home/david/thesis/data/processed/grb/grb_gbm_clean.csv"
df_grb = pd.read_csv(path)

print(df_grb.shape)
print(df_grb.columns)


(4208, 11)
Index(['NAME', 'RA', 'DEC', 'TRIGGER_TIME', 'T90', 'T90_ERROR', 'FLUENCE',
       'FLUENCE_ERROR', 'T50', 'T50_ERROR', 'TRIGGER_NAME'],
      dtype='str')


In [4]:
[c for c in df_grb.columns if "t90" in c.lower()]


['T90', 'T90_ERROR']

In [5]:
T90_COL = "T90"  # <-- change if needed

t90 = pd.to_numeric(df_grb[T90_COL], errors="coerce").to_numpy(float)
t90 = t90[np.isfinite(t90) & (t90 > 0)]

N = len(t90)
f1 = (t90 <= 1.0).mean()
f3 = (t90 <= 3.0).mean()

print("N =", N)
print("short fraction (T90<=1s) =", f1)
print("short fraction (T90<=3s) =", f3)


N = 4208
short fraction (T90<=1s) = 0.12357414448669202
short fraction (T90<=3s) = 0.19462927756653992


In [6]:
from pathlib import Path
base = Path.home() / "thesis"
candidates = list((base/"data"/"processed").rglob("*agn*clean*.csv")) + \
             list((base/"data"/"processed").rglob("*4lac*clean*.csv")) + \
             list((base/"data"/"processed").rglob("*agn*.csv"))
print("\n".join(map(str, candidates[:50])))
print("Total:", len(candidates))


/home/david/thesis/data/processed/agn/agn_4lac_clean.csv
/home/david/thesis/data/processed/agn/agn_4lac_clean.csv
/home/david/thesis/data/processed/agn/agn_4lac_clean.csv
Total: 3


In [7]:
from pathlib import Path

base = Path.home()/"thesis"/"data"/"processed"
hits = []
for pat in ["*agn*.csv", "*4lac*.csv", "*4fgl*.csv", "*lat*.csv", "*blazar*.csv"]:
    hits += list(base.rglob(pat))

print("Found:", len(hits))
for p in sorted(hits)[:200]:
    print(p)


Found: 2
/home/david/thesis/data/processed/agn/agn_4lac_clean.csv
/home/david/thesis/data/processed/agn/agn_4lac_clean.csv


In [8]:
path = "/home/david/thesis/data/processed/agn/4lac_clean.csv"


In [9]:
import pandas as pd

path = "/home/david/thesis/data/processed/agn/agn_4lac_clean.csv"
df = pd.read_csv(path)

print(df.shape)
print(df.columns)
df.head()


(3407, 18)
Index(['CLASS', 'DEC_Counterpart', 'Energy_Flux100', 'Flux1000',
       'Frac_Variability', 'LP_Index', 'PL_Index', 'RAJ2000', 'RA_Counterpart',
       'Redshift', 'SED_class', 'Source_Name', 'Unc_Energy_Flux100',
       'Unc_Flux1000', 'Unc_Frac_Variability', 'Unc_LP_Index', 'Unc_PL_Index',
       'Variability_Index'],
      dtype='str')


Unnamed: 0,CLASS,DEC_Counterpart,Energy_Flux100,Flux1000,Frac_Variability,LP_Index,PL_Index,RAJ2000,RA_Counterpart,Redshift,SED_class,Source_Name,Unc_Energy_Flux100,Unc_Flux1000,Unc_Frac_Variability,Unc_LP_Index,Unc_PL_Index,Variability_Index
0,b'bcu',47.700201,1.499454e-12,1.259796e-10,0.675882,2.254081,2.271696,0.3126,0.329341,-inf,b'ISP',b'4FGL J0001.2+4741',3.968492e-13,2.877197e-11,0.31285,0.226351,0.179057,25.313953
1,b'bll',-7.774145,8.339171e-12,7.471219e-10,0.406565,2.078927,2.116692,0.3151,0.325104,-inf,b'LSP',b'4FGL J0001.2-0747',5.499454e-13,5.041939e-11,0.11187,0.065974,0.049792,46.780693
2,b'bll',-0.19442,1.231385e-12,1.082246e-10,0.0,1.661223,1.93916,0.3717,0.33954,0.461516,b'LSP',b'4FGL J0001.4-0010',3.138318e-13,2.914439e-11,10.0,0.673303,0.192644,9.272764
3,b'fsrq',21.226743,2.555889e-11,1.347354e-09,0.996138,2.514159,2.65406,0.3815,0.384877,1.106,b'ISP',b'4FGL J0001.5+2113',6.652318e-13,6.172809e-11,0.213478,0.034551,0.020943,1910.9358
4,b'bcu',-41.923705,3.560476e-12,2.821817e-10,0.490977,1.693865,1.775176,0.4165,0.38648,-inf,b'HSP',b'4FGL J0001.6-4156',4.168776e-13,3.090882e-11,0.175782,0.107375,0.07332,26.393343


In [10]:
from pathlib import Path
p = Path("/home/david/thesis/data/processed/agn/agn_4lac_clean.csv")
print(p.exists(), p)


True /home/david/thesis/data/processed/agn/agn_4lac_clean.csv


In [11]:
import numpy as np
import pandas as pd
from pathlib import Path

path = "/home/david/thesis/data/processed/agn/agn_4lac_clean.csv"
df = pd.read_csv(path)

# Choose which parameters you want medians for:
cols = ["LP_Index", "PL_Index", "Frac_Variability", "Variability_Index", "Energy_Flux100", "Flux1000", "Redshift"]

# Strict: require value + uncertainty where available
strict_require = [
    "LP_Index", "Unc_LP_Index",
    "PL_Index", "Unc_PL_Index",
    "Frac_Variability", "Unc_Frac_Variability",
    "Energy_Flux100", "Unc_Energy_Flux100",
    "Flux1000", "Unc_Flux1000",
    "Variability_Index"
]

# Redshift often missing; treat separately
base = df.copy()

# Convert to numeric safely (some columns may be strings)
for c in set(cols + strict_require):
    if c in base.columns:
        base[c] = pd.to_numeric(base[c], errors="coerce")

strict = base.dropna(subset=[c for c in strict_require if c in base.columns]).copy()

print("Strict sample size:", len(strict))


Strict sample size: 3407


In [12]:
def subset_by_class(df, key):
    # key: "bll" or "fsrq"
    return df[df["CLASS"].astype(str).str.lower().str.contains(key)]

bll  = subset_by_class(strict, "bll")
fsrq = subset_by_class(strict, "fsrq")

def medians(df, cols):
    out = {}
    for c in cols:
        if c in df.columns:
            x = df[c].to_numpy()
            x = x[np.isfinite(x)]
            out[c] = float(np.median(x)) if len(x) else np.nan
    return out

cols_noz = [c for c in cols if c != "Redshift"]   # (handle z separately if you want)
m_all  = medians(strict, cols_noz)
m_bll  = medians(bll, cols_noz)
m_fsrq = medians(fsrq, cols_noz)

print("N_all, N_bll, N_fsrq:", len(strict), len(bll), len(fsrq))
print("Median LP_Index (BLL, FSRQ):", m_bll["LP_Index"], m_fsrq["LP_Index"])


N_all, N_bll, N_fsrq: 3407 1379 755
Median LP_Index (BLL, FSRQ): 1.9357148 2.38066


In [13]:
OUT_TEX = Path.home() / "thesis" / "outputs" / "tex"
OUT_TEX.mkdir(parents=True, exist_ok=True)

def fmt(x, nd=3):
    if x is None or (isinstance(x, float) and (np.isnan(x) or np.isinf(x))):
        return r"\nodata"
    return f"{x:.{nd}f}"

tex = []
tex.append(r"\newcommand{\AGNStrictReady}{1}")
tex.append(f"\\newcommand{{\\AGNStrictNAll}}{{{len(strict)}}}")
tex.append(f"\\newcommand{{\\AGNStrictNBLL}}{{{len(bll)}}}")
tex.append(f"\\newcommand{{\\AGNStrictNFSRQ}}{{{len(fsrq)}}}")

# medians you care about
tex.append(f"\\newcommand{{\\AGNMedLPIndexBLL}}{{{fmt(m_bll.get('LP_Index'))}}}")
tex.append(f"\\newcommand{{\\AGNMedLPIndexFSRQ}}{{{fmt(m_fsrq.get('LP_Index'))}}}")
tex.append(f"\\newcommand{{\\AGNMedPLIndexBLL}}{{{fmt(m_bll.get('PL_Index'))}}}")
tex.append(f"\\newcommand{{\\AGNMedPLIndexFSRQ}}{{{fmt(m_fsrq.get('PL_Index'))}}}")
tex.append(f"\\newcommand{{\\AGNMedFracVarBLL}}{{{fmt(m_bll.get('Frac_Variability'))}}}")
tex.append(f"\\newcommand{{\\AGNMedFracVarFSRQ}}{{{fmt(m_fsrq.get('Frac_Variability'))}}}")

(OUT_TEX / "agn_numbers.tex").write_text("\n".join(tex) + "\n")
print("Wrote:", OUT_TEX / "agn_numbers.tex")


Wrote: /home/david/thesis/outputs/tex/agn_numbers.tex


In [14]:
import numpy as np
import pandas as pd

path = "/home/david/thesis/data/processed/agn/agn_4lac_clean.csv"
df = pd.read_csv(path)

# ensure numeric
for c in ["Frac_Variability", "Unc_Frac_Variability"]:
    if c in df.columns:
        df[c] = pd.to_numeric(df[c], errors="coerce")

# "strict": require Frac_Variability to exist (and its uncertainty if available)
need = ["Frac_Variability"]
if "Unc_Frac_Variability" in df.columns:
    need.append("Unc_Frac_Variability")

strict = df.dropna(subset=need).copy()

# class filters (your CLASS values look like b'bll', b'fsrq')
cls = strict["CLASS"].astype(str).str.lower()
bll  = strict[cls.str.contains("bll")]
fsrq = strict[cls.str.contains("fsrq")]

def med(x):
    x = np.asarray(x, dtype=float)
    x = x[np.isfinite(x)]
    return float(np.median(x)) if len(x) else np.nan

med_bll  = med(bll["Frac_Variability"])
med_fsrq = med(fsrq["Frac_Variability"])

print("Strict N (BLL, FSRQ):", len(bll), len(fsrq))
print("Strict median Frac_Variability (BLL):", med_bll)
print("Strict median Frac_Variability (FSRQ):", med_fsrq)


Strict N (BLL, FSRQ): 1379 755
Strict median Frac_Variability (BLL): 0.27943447
Strict median Frac_Variability (FSRQ): 0.6485875


In [15]:
from pathlib import Path
OUT = Path.home() / "thesis" / "outputs" / "tex"
OUT.mkdir(parents=True, exist_ok=True)

tex = (
  r"\newcommand{\AGNStrictReady}{1}" "\n"
  rf"\newcommand{{\AGNVarMedBLL}}{{{med_bll:.4f}}}" "\n"
  rf"\newcommand{{\AGNVarMedFSRQ}}{{{med_fsrq:.4f}}}" "\n"
)
(OUT / "agn_strict_numbers.tex").write_text(tex)
print("Wrote:", OUT / "agn_strict_numbers.tex")


Wrote: /home/david/thesis/outputs/tex/agn_strict_numbers.tex


In [16]:
import pandas as pd
import numpy as np

path = "/home/david/thesis/data/processed/cr/ams02_proton_daily.csv"
df = pd.read_csv(path)

df["date"] = pd.to_datetime(df["date"], errors="coerce")
df = df.dropna(subset=["date", "flux", "rigidity_min_GV", "rigidity_max_GV"]).sort_values("date")

bins = [(3.29, 3.64), (3.64, 4.02)]  # neighboring bins

for rmin, rmax in bins:
    sel = df[np.isclose(df["rigidity_min_GV"], rmin) & np.isclose(df["rigidity_max_GV"], rmax)].copy()
    N = len(sel)
    if N == 0:
        print(f"{rmin:.2f}–{rmax:.2f} GV: N=0 (no rows found)")
        continue
    mean_flux = float(sel["flux"].mean())
    min_flux  = float(sel["flux"].min())
    max_flux  = float(sel["flux"].max())
    A = (max_flux - min_flux) / mean_flux
    print(f"{rmin:.2f}–{rmax:.2f} GV: N={N}, A={A:.6f}")


3.29–3.64 GV: N=2824, A=0.624235
3.64–4.02 GV: N=2824, A=0.561834


In [17]:
import os, glob
glob.glob("/home/david/thesis/data/processed/cr/*.csv")[:20]


['/home/david/thesis/data/processed/cr/ams02_proton_daily.csv']