In [None]:
import os
os.makedirs("outputs", exist_ok=True)
print("✅ Feature engineering ready")


Cloning into 'technosignature-pipeline-v2'...
remote: Enumerating objects: 89, done.[K
remote: Counting objects: 100% (89/89), done.[K
remote: Compressing objects: 100% (84/84), done.[K
remote: Total 89 (delta 31), reused 23 (delta 1), pack-reused 0 (from 0)[K
Receiving objects: 100% (89/89), 32.86 KiB | 3.65 MiB/s, done.
Resolving deltas: 100% (31/31), done.


In [None]:
# Cell 2: Phase 1 feature build (no TESS) — with ra/dec → float
import os
import pandas as pd
from pandas.errors import EmptyDataError
from astropy.coordinates import SkyCoord, match_coordinates_sky
import astropy.units as u

def safe_csv(path, cols):
    if not os.path.exists(path):
        return pd.DataFrame(columns=cols)
    try:
        return pd.read_csv(path)
    except EmptyDataError:
        return pd.DataFrame(columns=cols)

# 1) Load everything
planets = safe_csv("inputs/confirmed_planets.csv",
                   ["pl_name","ra","dec","tic_id"])
wise    = safe_csv("inputs/wise_photometry.csv",
                   ["pl_name","ra","dec","w1mpro","w2mpro","ir_flag"])
ps      = safe_csv("inputs/panstarrs.csv",   ["pl_name","ps_flag"])
sd      = safe_csv("inputs/sdss.csv",        ["pl_name","sdss_flag"])
bl      = safe_csv("inputs/bl_filtered.csv",["ra","dec","frequency_mhz"])
gaia    = safe_csv("inputs/gaia_flags.csv",  ["pl_name","gaia_flag"])
spec    = safe_csv("inputs/spec_flags.csv",  ["file","spike_flag","spike_count"])

# 2) coerce ra/dec to float
for df, cols in [(planets, ["ra","dec"]), (wise, ["ra","dec"]), (bl, ["ra","dec"])]:
    for c in cols:
        if c in df:
            df[c] = pd.to_numeric(df[c], errors="coerce")

print(f"✅ Loaded: planets={len(planets)}, IR={len(wise)}, PS={len(ps)}, SDSS={len(sd)}, BL={len(bl)}, GAIA={len(gaia)}, SPEC={len(spec)}")

# 3) build master and coordinate array
master    = planets.copy()
coords_pl = SkyCoord(master["ra"].values * u.deg,
                     master["dec"].values * u.deg)

# 4) IR flag (5″)
if not wise.empty:
    coords_w = SkyCoord(wise["ra"].values * u.deg,
                        wise["dec"].values * u.deg)
    _, d2d_ir, _ = match_coordinates_sky(coords_pl, coords_w)
    master["ir_flag"] = d2d_ir < 5 * u.arcsec
else:
    master["ir_flag"] = False

# 5) PS1 & SDSS by name
master = (
    master
    .merge(ps[["pl_name","ps_flag"]],   on="pl_name", how="left")
    .merge(sd[["pl_name","sdss_flag"]], on="pl_name", how="left")
)

# 6) Radio flag (1″)
if not bl.empty:
    coords_b = SkyCoord(bl["ra"].values * u.deg,
                        bl["dec"].values * u.deg)
    _, d2d_bl, _ = match_coordinates_sky(coords_pl, coords_b)
    master["radio_flag"] = d2d_bl < 1 * u.arcsec
else:
    master["radio_flag"] = False

# 7) Gaia & Spec
master["gaia_flag"] = (
    master["pl_name"]
      .map(gaia.set_index("pl_name")["gaia_flag"])
      .fillna(False)
)
master["spec_flag"] = False

# 8) fill NaNs & save
for col in ["ir_flag","ps_flag","sdss_flag","radio_flag","gaia_flag","spec_flag"]:
    master[col] = master[col].fillna(False)

os.makedirs("outputs", exist_ok=True)
master.to_csv("outputs/feature_base.csv", index=False)

# 9) pick top 200 for TESS
master["base_score"] = master[[
    "ir_flag","ps_flag","sdss_flag","radio_flag","gaia_flag","spec_flag"
]].astype(int).sum(axis=1)

top200 = master.nlargest(200, "base_score")[["pl_name","ra","dec","tic_id"]]
os.makedirs("inputs", exist_ok=True)
top200.to_csv("inputs/tess_targets.csv", index=False)

print(f"✅ Phase 1 done: {len(master)} rows to feature_base.csv; {len(top200)} → tess_targets.csv")
