In [None]:
import os
os.makedirs("outputs", exist_ok=True)
print("✅ Feature engineering ready")


Cloning into 'technosignature-pipeline-v2'...
remote: Enumerating objects: 89, done.[K
remote: Counting objects: 100% (89/89), done.[K
remote: Compressing objects: 100% (84/84), done.[K
remote: Total 89 (delta 31), reused 23 (delta 1), pack-reused 0 (from 0)[K
Receiving objects: 100% (89/89), 32.86 KiB | 3.65 MiB/s, done.
Resolving deltas: 100% (31/31), done.


In [None]:
# Cell 2: Phase 1 feature build (no TESS)
import os
import pandas as pd
from astropy.coordinates import SkyCoord, match_coordinates_sky
import astropy.units as u

# — 1) Load master planet list
planets = pd.read_csv("inputs/confirmed_planets.csv")

# — 2) Load all the flag/catalog tables
wise = pd.read_csv("inputs/wise_photometry.csv")       # AllWISE IR
ps   = pd.read_csv("inputs/panstarrs.csv")             # Pan‑STARRS hits
sd   = pd.read_csv("inputs/sdss.csv")                  # SDSS hits
bl   = pd.read_csv("inputs/bl_filtered.csv")           # BL radio hits (filtered)
gaia = pd.read_csv("inputs/gaia_flags.csv")            # Gaia RUWE flags
spec = pd.read_csv("inputs/spec_flags.csv")            # Spectral spike flags

# — 3) Start the feature matrix with the planet table
master = planets.copy()
coords_planets = SkyCoord(master["ra"]*u.deg, master["dec"]*u.deg)

# — 4) IR flag: match AllWISE positions within 5″
coords_wise = SkyCoord(wise["ra"]*u.deg, wise["dec"]*u.deg)
_, d2d_ir, _ = match_coordinates_sky(coords_planets, coords_wise)
master["ir_flag"] = d2d_ir < 5*u.arcsec

# — 5) PS1 & SDSS: merge by planet name
master = master.merge(
    ps[["pl_name","ps_flag"]],
    on="pl_name", how="left"
).merge(
    sd[["pl_name","sdss_flag"]],
    on="pl_name", how="left"
)

# — 6) Radio flag: match BL hits within 1″
coords_bl = SkyCoord(bl["ra"]*u.deg, bl["dec"]*u.deg)
_, d2d_bl, _ = match_coordinates_sky(coords_planets, coords_bl)
master["radio_flag"] = d2d_bl < 1*u.arcsec

# — 7) Gaia & Spec flags: map by name (fill missing → False)
master["gaia_flag"] = (
    master["pl_name"]
      .map(gaia.set_index("pl_name")["gaia_flag"])
      .fillna(False)
)
master["spec_flag"] = False   # unless you have a mapping from spec→pl_name

# — 8) Fill any remaining NaNs in boolean columns
for col in ["ir_flag","ps_flag","sdss_flag","radio_flag","gaia_flag","spec_flag"]:
    master[col] = master[col].fillna(False)

# — 9) Save the base feature matrix
os.makedirs("outputs", exist_ok=True)
master.to_csv("outputs/feature_base.csv", index=False)

# — 10) Compute a “base_score” & pick top 200 for TESS
master["base_score"] = master[[
    "ir_flag","ps_flag","sdss_flag","radio_flag","gaia_flag","spec_flag"
]].astype(int).sum(axis=1)

top200 = master.nlargest(200, "base_score")[["pl_name","ra","dec","tic_id"]]
top200.to_csv("inputs/tess_targets.csv", index=False)

print(f"✅ Phase 1 complete: feature_base.csv ({len(master)} rows), tess_targets.csv ({len(top200)} rows)")
