In [None]:
!rm -rf technosignature-pipeline-v2
!git clone https://github.com/rbaner21/technosignature-pipeline-v2.git
!pip install -q pandas astropy pyarrow

Cloning into 'technosignature-pipeline-v2'...
remote: Enumerating objects: 85, done.[K
remote: Counting objects: 100% (85/85), done.[K
remote: Compressing objects: 100% (80/80), done.[K
remote: Total 85 (delta 29), reused 23 (delta 1), pack-reused 0 (from 0)[K
Receiving objects: 100% (85/85), 31.73 KiB | 1.76 MiB/s, done.
Resolving deltas: 100% (29/29), done.


In [None]:
# Cell 2: Load all pipeline inputs (CSV stubs if missing)
import os
import pandas as pd

# — NEOWISE photometry
nei_path = "inputs/neowise.csv"
if os.path.exists(nei_path):
    df_neo = pd.read_csv(nei_path)
else:
    df_neo = pd.DataFrame(columns=["pl_name","w1_med","w2_med","ir_flag"])
print(f"✅ NEOWISE photometry: {len(df_neo)} rows")

# — AllWISE photometry (optional)
wise_path = "inputs/wise_photometry.csv"
if os.path.exists(wise_path):
    df_wise = pd.read_csv(wise_path)
else:
    df_wise = pd.DataFrame(columns=["pl_name","w1mpro","w2mpro","ir_flag"])
print(f"✅ AllWISE photometry: {len(df_wise)} rows")

# — Breakthrough Listen hits
bl_path = "inputs/bl_hits_filtered.csv"
if os.path.exists(bl_path):
    df_bl = pd.read_csv(bl_path)
else:
    df_bl = pd.DataFrame(columns=["frequency_mhz"])
print(f"✅ BL hits filtered: {len(df_bl)} rows")

# — TESS flags (if you have them)
tess_path = "inputs/tess_flags.csv"
if os.path.exists(tess_path):
    df_tess = pd.read_csv(tess_path)
    print(f"✅ TESS flags: {len(df_tess)} rows")
else:
    df_tess = pd.DataFrame(columns=["pl_name","tess_flag"])
    print("ℹ️  No TESS flags found, using empty table")

# — Gaia RUWE flags
gaia_path = "inputs/gaia_flags.csv"
if os.path.exists(gaia_path):
    df_gaia = pd.read_csv(gaia_path)
else:
    df_gaia = pd.DataFrame(columns=["pl_name","gaia_flag"])
print(f"✅ Gaia flags: {len(df_gaia)} rows")

# — Spectral spike flags
spec_path = "inputs/spec_flags.csv"
if os.path.exists(spec_path):
    df_spec = pd.read_csv(spec_path)
else:
    df_spec = pd.DataFrame(columns=["file","spec_flag","spike_count"])
print(f"✅ Spectral flags: {len(df_spec)} rows")


In [None]:
from astropy.coordinates import SkyCoord
import astropy.units as u

coords = SkyCoord(df_neo["ra"]*u.deg, df_neo["dec"]*u.deg, frame="icrs")
df_neo["ra_icrs"], df_neo["dec_icrs"] = coords.ra.deg, coords.dec.deg
df_neo.to_parquet("inputs/neowise_filtered_icrs.parquet", index=False)
print("✅ NEOWISE coords harmonized")


In [None]:
# We already have:
# inputs/neowise_filtered_icrs.parquet
# inputs/bl_filtered.csv
# inputs/tess_flags.csv
# inputs/gaia_flags.csv
# inputs/spec_flags.csv
# inputs/confirmed_planets.csv
print("✅ Preprocessing complete—flags ready for feature engineering")
