In [None]:
!rm -rf technosignature-pipeline-v2
!git clone https://github.com/rbaner21/technosignature-pipeline-v2.git
!pip install -q pandas astropy pyarrow

Cloning into 'technosignature-pipeline-v2'...
remote: Enumerating objects: 85, done.[K
remote: Counting objects: 100% (85/85), done.[K
remote: Compressing objects: 100% (80/80), done.[K
remote: Total 85 (delta 29), reused 23 (delta 1), pack-reused 0 (from 0)[K
Receiving objects: 100% (85/85), 31.73 KiB | 1.76 MiB/s, done.
Resolving deltas: 100% (29/29), done.


In [None]:
# Cell 2: Load all pipeline inputs (CSV stubs or empty if missing/blank)
import os
import pandas as pd
from pandas.errors import EmptyDataError

def safe_csv(path, columns):
    """
    Try to read `path` into a DataFrame.
    If the file is missing or empty, return an empty DataFrame with `columns`.
    """
    if not os.path.exists(path):
        return pd.DataFrame(columns=columns)
    try:
        return pd.read_csv(path)
    except EmptyDataError:
        return pd.DataFrame(columns=columns)

# — NEOWISE photometry
df_neo = safe_csv("inputs/neowise.csv",
                  ["pl_name","w1_med","w2_med","ir_flag"])
print(f"✅ NEOWISE photometry: {len(df_neo)} rows")

# — AllWISE photometry
df_wise = safe_csv("inputs/wise_photometry.csv",
                   ["pl_name","w1mpro","w2mpro","ir_flag"])
print(f"✅ AllWISE photometry: {len(df_wise)} rows")

# — Breakthrough Listen hits
df_bl = safe_csv("inputs/bl_hits_filtered.csv",
                 ["frequency_mhz"])
print(f"✅ BL hits filtered: {len(df_bl)} rows")

# — TESS flags
df_tess = safe_csv("inputs/tess_flags.csv",
                   ["pl_name","tess_flag"])
print(f"✅ TESS flags: {len(df_tess)} rows")

# — Gaia RUWE flags
df_gaia = safe_csv("inputs/gaia_flags.csv",
                   ["pl_name","gaia_flag"])
print(f"✅ Gaia flags: {len(df_gaia)} rows")

# — Spectral spike flags
df_spec = safe_csv("inputs/spec_flags.csv",
                   ["file","spec_flag","spike_count"])
print(f"✅ Spectral flags: {len(df_spec)} rows")


In [None]:
from astropy.coordinates import SkyCoord
import astropy.units as u

coords = SkyCoord(df_neo["ra"]*u.deg, df_neo["dec"]*u.deg, frame="icrs")
df_neo["ra_icrs"], df_neo["dec_icrs"] = coords.ra.deg, coords.dec.deg
df_neo.to_parquet("inputs/neowise_filtered_icrs.parquet", index=False)
print("✅ NEOWISE coords harmonized")


In [None]:
# We already have:
# inputs/neowise_filtered_icrs.parquet
# inputs/bl_filtered.csv
# inputs/tess_flags.csv
# inputs/gaia_flags.csv
# inputs/spec_flags.csv
# inputs/confirmed_planets.csv
print("✅ Preprocessing complete—flags ready for feature engineering")
