In [None]:
!rm -rf technosignature-pipeline-v2
!git clone https://github.com/rbaner21/technosignature-pipeline-v2.git
!pip install -q pandas astropy pyarrow

Cloning into 'technosignature-pipeline-v2'...
remote: Enumerating objects: 85, done.[K
remote: Counting objects: 100% (85/85), done.[K
remote: Compressing objects: 100% (80/80), done.[K
remote: Total 85 (delta 29), reused 23 (delta 1), pack-reused 0 (from 0)[K
Receiving objects: 100% (85/85), 31.73 KiB | 1.76 MiB/s, done.
Resolving deltas: 100% (29/29), done.


In [None]:
# Cell 2: Load all pipeline inputs (CSV stubs or empty if missing/blank)
import os
import pandas as pd
from pandas.errors import EmptyDataError

def safe_csv(path, columns):
    """
    Try to read `path` into a DataFrame.
    If the file is missing or empty, return an empty DataFrame with `columns`.
    """
    if not os.path.exists(path):
        return pd.DataFrame(columns=columns)
    try:
        return pd.read_csv(path)
    except EmptyDataError:
        return pd.DataFrame(columns=columns)

# — NEOWISE photometry
df_neo = safe_csv("inputs/neowise.csv",
                  ["pl_name","w1_med","w2_med","ir_flag"])
print(f"✅ NEOWISE photometry: {len(df_neo)} rows")

# — AllWISE photometry
df_wise = safe_csv("inputs/wise_photometry.csv",
                   ["pl_name","w1mpro","w2mpro","ir_flag"])
print(f"✅ AllWISE photometry: {len(df_wise)} rows")

# — Breakthrough Listen hits
df_bl = safe_csv("inputs/bl_hits_filtered.csv",
                 ["frequency_mhz"])
print(f"✅ BL hits filtered: {len(df_bl)} rows")

# — TESS flags
df_tess = safe_csv("inputs/tess_flags.csv",
                   ["pl_name","tess_flag"])
print(f"✅ TESS flags: {len(df_tess)} rows")

# — Gaia RUWE flags
df_gaia = safe_csv("inputs/gaia_flags.csv",
                   ["pl_name","gaia_flag"])
print(f"✅ Gaia flags: {len(df_gaia)} rows")

# — Spectral spike flags
df_spec = safe_csv("inputs/spec_flags.csv",
                   ["file","spec_flag","spike_count"])
print(f"✅ Spectral flags: {len(df_spec)} rows")


In [None]:
# Cell 3: Harmonize RA/Dec → ICRS coords (with numeric coercion)
import pandas as pd
from astropy.coordinates import SkyCoord
import astropy.units as u

# 1) Load the master planet table
df_pl = pd.read_csv("inputs/confirmed_planets.csv")

# 2) Force ra/dec to floats (any non‑numeric becomes NaN) and drop bad rows
df_pl["ra"]  = pd.to_numeric(df_pl["ra"],  errors="coerce")
df_pl["dec"] = pd.to_numeric(df_pl["dec"], errors="coerce")
df_pl = df_pl.dropna(subset=["ra","dec"]).reset_index(drop=True)

# 3) Build SkyCoord in ICRS
coords = SkyCoord(
    df_pl["ra"].values  * u.deg,
    df_pl["dec"].values * u.deg,
    frame="icrs"
)
df_pl["ra_icrs"]  = coords.ra.deg
df_pl["dec_icrs"] = coords.dec.deg

# 4) Save an ICRS‑annotated planet table for downstream use
df_pl.to_csv("inputs/confirmed_planets_icrs.csv", index=False)
print(f"✅ ICRS coords computed for {len(df_pl)} planets → inputs/confirmed_planets_icrs.csv")


In [None]:
# Cell 4: Merge NEOWISE IR flags with Gaia RUWE flags by planet name
import pandas as pd

# Load the two flag tables
df_neo  = pd.read_csv("inputs/neowise.csv")        # IR flags
df_gaia = pd.read_csv("inputs/gaia_flags.csv")     # Gaia RUWE flags

# Merge them into a single DataFrame (left join keeps all IR entries)
df_merge = df_neo.merge(
    df_gaia,
    on="pl_name",
    how="left"
)

# Wherever gaia_flag is NaN (no match), assume False
df_merge["gaia_flag"] = df_merge["gaia_flag"].fillna(False)

# Write out for downstream feature engineering
os.makedirs("inputs", exist_ok=True)
df_merge.to_csv("inputs/ir_gaia_flags.csv", index=False)

print(f"✅ Merged IR+Gaia flags: {len(df_merge)} rows")


In [None]:
# Cell 5: Detrend & normalize light curves (and join in IR+Gaia flags)
import lightkurve as lk
import pandas as pd

# Load your merged flag table
flags = pd.read_csv("inputs/ir_gaia_flags.csv")

# Example: for each planet, download and flatten its TESS LC
lc_flat_list = []
for tic in flags["tic_id"].dropna().unique():
    lc = lk.search_lightcurve(f"TIC {int(tic)}", mission="TESS").download()
    lc_flat = lc.flatten(window_length=401)
    lc_flat["tic_id"] = tic
    lc_flat_list.append(lc_flat)

# Concatenate and save
all_flat = lk.LightCurveCollection(lc_flat_list)
all_flat.to_fits("inputs/tess/lc_flat_collection.fits", overwrite=True)
print(f"✅ Flattened {len(lc_flat_list)} TESS light curves")
