In [None]:
import os
os.makedirs("outputs", exist_ok=True)
print("✅ Feature engineering ready")


Cloning into 'technosignature-pipeline-v2'...
remote: Enumerating objects: 89, done.[K
remote: Counting objects: 100% (89/89), done.[K
remote: Compressing objects: 100% (84/84), done.[K
remote: Total 89 (delta 31), reused 23 (delta 1), pack-reused 0 (from 0)[K
Receiving objects: 100% (89/89), 32.86 KiB | 3.65 MiB/s, done.
Resolving deltas: 100% (31/31), done.


In [None]:
# Cell 2: Phase 1 feature build (no TESS)
import pandas as pd
from astropy.coordinates import SkyCoord, match_coordinates_sky
import astropy.units as u
import os

# Load everything except tess_flags
plan = pd.read_csv("inputs/confirmed_planets.csv")
wise = pd.read_csv("inputs/wise_photometry_icrs.csv")
ps   = pd.read_csv("inputs/panstarrs.csv")
sd   = pd.read_csv("inputs/sdss.csv")
bl   = pd.read_csv("inputs/bl_filtered_icrs.csv")
gaia = pd.read_csv("inputs/gaia_flags.csv")
spec = pd.read_csv("inputs/spec_flags.csv")

master = plan.copy()
coords_pl = SkyCoord(master["ra"]*u.deg, master["dec"]*u.deg)

# IR flag (5″ cone)
coords_w = SkyCoord(wise["ra_icrs"]*u.deg, wise["dec_icrs"]*u.deg)
_,d2d,_ = match_coordinates_sky(coords_pl, coords_w)
master["ir_flag"] = d2d < 5*u.arcsec

# PS1 & SDSS flags by name
master = master.merge(ps[["pl_name","ps_flag"]],   on="pl_name", how="left")
master = master.merge(sd[["pl_name","sdss_flag"]], on="pl_name", how="left")

# Radio flag (1″ cone)
coords_b = SkyCoord(bl["ra_icrs"]*u.deg, bl["dec_icrs"]*u.deg)
_,d2d,_ = match_coordinates_sky(coords_pl, coords_b)
master["radio_flag"] = d2d < 1*u.arcsec

# Gaia & Spec are already boolean
master["gaia_flag"] = gaia.set_index("pl_name")["gaia_flag"]
master["spec_flag"]  = False   # unless you map spec files→planets

# Fill NaNs in all six flags
for col in ["ir_flag","ps_flag","sdss_flag","radio_flag","gaia_flag","spec_flag"]:
    master[col] = master[col].fillna(False)

# 1. Save the base feature matrix
os.makedirs("outputs", exist_ok=True)
master.to_csv("outputs/feature_base.csv", index=False)

# 2. Compute base_score and pick top 200 for TESS
master["base_score"] = master[[
    "ir_flag","ps_flag","sdss_flag","radio_flag","gaia_flag","spec_flag"
]].astype(int).sum(axis=1)

top200 = master.nlargest(200, "base_score")[
    ["pl_name","ra","dec","tic_id"]
]
top200.to_csv("inputs/tess_targets.csv", index=False)

print(f"✅ Phase 1 done: feature_base.csv ({len(master)} rows), tess_targets.csv ({len(top200)} rows)")
