In [None]:
!rm -rf technosignature-pipeline-v2
!git clone https://github.com/rbaner21/technosignature-pipeline-v2.git
!pip install -q pandas astropy pyarrow

Cloning into 'technosignature-pipeline-v2'...
remote: Enumerating objects: 85, done.[K
remote: Counting objects: 100% (85/85), done.[K
remote: Compressing objects: 100% (80/80), done.[K
remote: Total 85 (delta 29), reused 23 (delta 1), pack-reused 0 (from 0)[K
Receiving objects: 100% (85/85), 31.73 KiB | 1.76 MiB/s, done.
Resolving deltas: 100% (29/29), done.


In [None]:
# Cell 2: Load all pipeline inputs (CSV stubs or empty if missing/blank)
import os
import pandas as pd
from pandas.errors import EmptyDataError

def safe_csv(path, columns):
    """
    Try to read `path` into a DataFrame.
    If the file is missing or empty, return an empty DataFrame with `columns`.
    """
    if not os.path.exists(path):
        return pd.DataFrame(columns=columns)
    try:
        return pd.read_csv(path)
    except EmptyDataError:
        return pd.DataFrame(columns=columns)

# — NEOWISE photometry
df_neo = safe_csv("inputs/neowise.csv",
                  ["pl_name","w1_med","w2_med","ir_flag"])
print(f"✅ NEOWISE photometry: {len(df_neo)} rows")

# — AllWISE photometry
df_wise = safe_csv("inputs/wise_photometry.csv",
                   ["pl_name","w1mpro","w2mpro","ir_flag"])
print(f"✅ AllWISE photometry: {len(df_wise)} rows")

# — Breakthrough Listen hits
df_bl = safe_csv("inputs/bl_hits_filtered.csv",
                 ["frequency_mhz"])
print(f"✅ BL hits filtered: {len(df_bl)} rows")

# — TESS flags
df_tess = safe_csv("inputs/tess_flags.csv",
                   ["pl_name","tess_flag"])
print(f"✅ TESS flags: {len(df_tess)} rows")

# — Gaia RUWE flags
df_gaia = safe_csv("inputs/gaia_flags.csv",
                   ["pl_name","gaia_flag"])
print(f"✅ Gaia flags: {len(df_gaia)} rows")

# — Spectral spike flags
df_spec = safe_csv("inputs/spec_flags.csv",
                   ["file","spec_flag","spike_count"])
print(f"✅ Spectral flags: {len(df_spec)} rows")


In [None]:
# Cell 3: Harmonize RA/Dec → ICRS and merge into NEOWISE photometry
import pandas as pd
from astropy.coordinates import SkyCoord
import astropy.units as u

# 1) Load master planet list with RA/Dec
df_pl = pd.read_csv("inputs/confirmed_planets.csv")

# 2) Compute ICRS coordinates
coords = SkyCoord(df_pl["ra"] * u.deg, df_pl["dec"] * u.deg, frame="icrs")
df_pl["ra_icrs"]  = coords.ra.deg
df_pl["dec_icrs"] = coords.dec.deg
print(f"✅ Computed ICRS coords for {len(df_pl)} planets")

# 3) Load the NEOWISE table you produced in Notebook 1
df_neo = pd.read_csv("inputs/neowise.csv")

# 4) Merge in the new ICRS columns by pl_name
df_neo = df_neo.merge(
    df_pl[["pl_name", "ra_icrs", "dec_icrs"]],
    on="pl_name",
    how="left"
)

# 5) Save out a version you can use downstream
df_neo.to_csv("inputs/neowise_icrs.csv", index=False)
print(f"✅ NEOWISE table now has ICRS coords: {len(df_neo)} rows")


In [None]:
# Cell 4: Cross-match Gaia flags vs NEOWISE ICRS within 1"
import pandas as pd
from astropy.coordinates import SkyCoord, match_coordinates_sky
import astropy.units as u

# Load ICRS‐annotated NEOWISE and raw Gaia‐flag table
nw = pd.read_csv("inputs/neowise_icrs.csv")
gaia = pd.read_csv("inputs/gaia_flags.csv")  # has pl_name, gaia_flag

# Build SkyCoord objects
coords_nw  = SkyCoord(nw["ra_icrs"]*u.deg, nw["dec_icrs"]*u.deg)
coords_pl  = SkyCoord(df_pl["ra_icrs"]*u.deg, df_pl["dec_icrs"]*u.deg)

# Match each planet to itself (mask out any giant failures)
idx, d2d, _ = match_coordinates_sky(coords_nw, coords_pl)
mask = d2d < 1*u.arcsec

# Attach gaia_flag to NEOWISE table
nw["gaia_flag"] = False
for i, keep in enumerate(mask):
    if keep:
        # find matching pl_name index
        name = df_pl.iloc[idx[i]]["pl_name"]
        nw.at[i, "gaia_flag"] = bool(
            gaia.loc[gaia["pl_name"]==name, "gaia_flag"].any()
        )

# Save a combined table for downstream
nw.to_csv("inputs/neowise_merged.csv", index=False)
print(f"✅ Cross‑matched {mask.sum()} entries within 1\"")
