In [None]:
# Cell 1: ensure inputs/ hierarchy exists
import os
os.makedirs("inputs/tess",  exist_ok=True)
os.makedirs("inputs/harps", exist_ok=True)
print("✅ inputs/ folder ready")


Cloning into 'technosignature-pipeline-v2'...
remote: Enumerating objects: 81, done.[K
remote: Counting objects: 100% (81/81), done.[K
remote: Compressing objects: 100% (76/76), done.[K
remote: Total 81 (delta 28), reused 23 (delta 1), pack-reused 0 (from 0)[K
Receiving objects: 100% (81/81), 28.50 KiB | 2.04 MiB/s, done.
Resolving deltas: 100% (28/28), done.


In [None]:
# Cell 2: download list of confirmed exoplanets
import pandas as pd

url = (
    "https://exoplanetarchive.ipac.caltech.edu/TAP/sync"
    "?query=select+pl_name,ra,dec,tic_id+from+pscomppars&format=csv"
)
pl = pd.read_csv(url)
pl.to_csv("inputs/confirmed_planets.csv", index=False)
print(f"✅ Confirmed planets: {len(pl)} rows")


In [None]:
# Cell 3: Parallel NEOWISE Single‑Exposure → inputs/neowise.csv
import os
import numpy as np
import pandas as pd
from astropy.coordinates import SkyCoord
import astropy.units as u
from astroquery.irsa import Irsa
from concurrent.futures import ThreadPoolExecutor, as_completed

# Prepare
os.makedirs("inputs", exist_ok=True)
out = "inputs/neowise.csv"
if os.path.exists(out):
    os.remove(out)

planets = pd.read_csv("inputs/confirmed_planets.csv")

def query_neowise(row):
    coord = SkyCoord(ra=row["ra"]*u.deg, dec=row["dec"]*u.deg, frame="icrs")
    try:
        tbl = Irsa.query_region(
            coord,
            catalog="neowiser_p1bs_psd",
            radius=5*u.arcsec,
            columns=["w1mpro","w2mpro"]
        )
        if len(tbl):
            w1s = np.array(tbl["w1mpro"], float)
            w2s = np.array(tbl["w2mpro"], float)
            w1_med = float(np.nanmedian(w1s))
            w2_med = float(np.nanmedian(w2s))
            return {
                "pl_name": row["pl_name"],
                "w1_med":  w1_med,
                "w2_med":  w2_med,
                "ir_flag": bool((w2_med - w1_med) > 0.5)
            }
    except:
        pass
    return None

results = []
with ThreadPoolExecutor(max_workers=16) as exe:
    futures = {exe.submit(query_neowise, r): r for _, r in planets.iterrows()}
    for f in as_completed(futures):
        r = f.result()
        if r:
            results.append(r)

pd.DataFrame(results).to_csv(out, index=False)
print(f"✅ NEOWISE done: {len(results)} rows → {out}")


In [None]:
# Cell 4: Parallel AllWISE IRSA → inputs/wise_photometry.csv
import os
import pandas as pd
from astropy.coordinates import SkyCoord
import astropy.units as u
from astroquery.irsa import Irsa
from concurrent.futures import ThreadPoolExecutor, as_completed

os.makedirs("inputs", exist_ok=True)
out = "inputs/wise_photometry.csv"
if os.path.exists(out):
    os.remove(out)

pl = pd.read_csv("inputs/confirmed_planets.csv")

def query_wise(row):
    coord = SkyCoord(ra=row["ra"]*u.deg, dec=row["dec"]*u.deg, frame="icrs")
    try:
        tbl = Irsa.query_region(
            coord,
            catalog="allwise_p3as_psd",
            radius=5*u.arcsec,
            columns=["w1mpro","w2mpro"]
        )
        if len(tbl):
            w1, w2 = float(tbl[0]["w1mpro"]), float(tbl[0]["w2mpro"])
            return {
                "pl_name": row["pl_name"],
                "w1mpro": w1,
                "w2mpro": w2,
                "ir_flag": bool((w2 - w1) > 0.5)
            }
    except:
        pass
    return None

results = []
with ThreadPoolExecutor(max_workers=16) as exe:
    futures = {exe.submit(query_wise, r): r for _, r in pl.iterrows()}
    for f in as_completed(futures):
        r = f.result()
        if r:
            results.append(r)

pd.DataFrame(results).to_csv(out, index=False)
print(f"✅ AllWISE complete: {len(results)} rows → {out}")


In [None]:
# Cell 5: Threaded Pan‑STARRS & SDSS → inputs/panstarrs.csv & inputs/sdss.csv
import os
import pandas as pd
from astroquery.vizier import Vizier
from astropy.coordinates import SkyCoord
import astropy.units as u
from concurrent.futures import ThreadPoolExecutor, as_completed

os.makedirs("inputs", exist_ok=True)
pl = pd.read_csv("inputs/confirmed_planets.csv")
Vizier.ROW_LIMIT = 1

def query_ps_sd(row):
    coord = SkyCoord(ra=row["ra"]*u.deg, dec=row["dec"]*u.deg, frame="icrs")
    out = {"pl_name": row["pl_name"], "ps_flag": False, "sdss_flag": False}
    try:
        ps = Vizier(catalog="II/349/ps1", columns=["gmag"]) \
             .query_region(coord, radius=5*u.arcsec)
        if ps and len(ps[0]) > 0:
            out["ps_flag"] = True
    except:
        pass
    try:
        sd = Vizier(catalog="V/147", columns=["u"]) \
             .query_region(coord, radius=5*u.arcsec)
        if sd and len(sd[0]) > 0:
            out["sdss_flag"] = True
    except:
        pass
    return out

results = []
with ThreadPoolExecutor(max_workers=16) as exe:
    futures = {exe.submit(query_ps_sd, r): r for _, r in pl.iterrows()}
    for f in as_completed(futures):
        results.append(f.result())

df = pd.DataFrame(results)
df[df.ps_flag].to_csv("inputs/panstarrs.csv", index=False)
df[df.sdss_flag].to_csv("inputs/sdss.csv",     index=False)
print(f"✅ PS1 hits: {df.ps_flag.sum()}, SDSS hits: {df.sdss_flag.sum()}")


In [None]:
# Cell 6: Robust Download & Filter of Breakthrough Listen hits (with DNS‐error fallback)
import os
import requests
import pandas as pd

# ensure inputs/ exists
os.makedirs("inputs", exist_ok=True)

# URLs & paths
url      = "https://public.breakthroughlisten.org/dataset/hits.csv"
in_path  = "inputs/bl_hits.csv"
out_path = "inputs/bl_hits_filtered.csv"

# 1) Download (with retry) or skip on failure
if not os.path.exists(in_path):
    print(f"➡️ Downloading BL hits from {url}")
    session = requests.Session()
    session.mount("https://", requests.adapters.HTTPAdapter(max_retries=3))
    try:
        with session.get(url, stream=True, timeout=30) as r:
            r.raise_for_status()
            with open(in_path, "wb") as f:
                for chunk in r.iter_content(chunk_size=10_000_000):
                    if chunk:
                        f.write(chunk)
        print(f"✅ Download complete: {in_path}")
    except Exception as e:
        print(f"⚠️  Could not download BL hits (DNS or network error): {e}")
        # create a stub so downstream reads an empty file
        pd.DataFrame(columns=["frequency_mhz"]).to_csv(in_path, index=False)
        print(f"ℹ️  Created empty stub: {in_path}")
else:
    print(f"ℹ️ Found existing BL file, skipping download: {in_path}")

# 2) Filter into the 1419–1421 MHz window, or create empty if stubbed
print(f"➡️ Filtering hits between 1419–1421 MHz → {out_path}")
if os.path.exists(out_path):
    os.remove(out_path)

try:
    for chunk in pd.read_csv(in_path, usecols=["frequency_mhz"], chunksize=500_000):
        mask = chunk["frequency_mhz"].between(1419, 1421)
        if mask.any():
            chunk.loc[mask].to_csv(
                out_path,
                mode="a",
                index=False,
                header=not os.path.exists(out_path)
            )
        print(f"   Processed {len(chunk):,} rows, kept {mask.sum():,}")
    print(f"✅ BL filtering complete: {out_path}")
except Exception as e:
    print(f"⚠️  BL filtering skipped (no valid input): {e}")
    pd.DataFrame(columns=["frequency_mhz"]).to_csv(out_path, index=False)
    print(f"ℹ️  Created empty stub: {out_path}")


In [None]:
# Cell 7: Threaded Gaia RUWE → inputs/gaia_flags.csv
import pandas as pd
from astroquery.gaia import Gaia
from concurrent.futures import ThreadPoolExecutor, as_completed

pl = pd.read_csv("inputs/confirmed_planets.csv")

def query_gaia(row):
    ra, dec, name = row["ra"], row["dec"], row["pl_name"]
    q = f"""
      SELECT ruwe
      FROM gaiaedr3.gaia_source
      WHERE CONTAINS(
        POINT('ICRS',ra,dec),
        CIRCLE('ICRS',{ra},{dec},0.00027778)
      )=1
      LIMIT 1
    """
    try:
        df = Gaia.launch_job(q).get_results().to_pandas()
        ru = df["ruwe"].iloc[0] if not df.empty else None
        return {"pl_name":name, "gaia_flag": bool(ru and ru>1.4)}
    except:
        return {"pl_name":name, "gaia_flag": False}

results = []
with ThreadPoolExecutor(max_workers=16) as exe:
    futures = {exe.submit(query_gaia, r): r for _, r in pl.iterrows()}
    for f in as_completed(futures):
        results.append(f.result())

pd.DataFrame(results).to_csv("inputs/gaia_flags.csv", index=False)
print(f"✅ Gaia RUWE flags: {sum(r['gaia_flag'] for r in results)} true")


In [None]:
# Cell 8: Retrieve one HARPS Phase‑3 dataset and flag spikes in any CSV spectra
from astroquery.eso import Eso
import glob, os, pandas as pd

# 1) Query the ESO Phase‑3 archive for HARPS
eso = Eso()
tbl = eso.query_surveys(surveys="HARPS")  # list Phase‑3 products for HARPS :contentReference[oaicite:0]{index=0}

# ensure output folder exists
os.makedirs("inputs/harps", exist_ok=True)

# 2) If nothing found, write empty stub
if tbl is None or len(tbl) == 0:
    print("ℹ️  No HARPS Phase‑3 products found; creating empty spec_flags.csv")
    pd.DataFrame(columns=["file","spike_flag","spike_count"])\
      .to_csv("inputs/spec_flags.csv", index=False)

else:
    # 3) Retrieve the first product ID
    first_id = tbl["Product ID"][0]
    files = eso.retrieve_data(first_id, destination="inputs/harps", unzip=True)  # download files :contentReference[oaicite:1]{index=1}

    # 4) Loop through any CSVs in inputs/harps/, flag spikes
    flags = []
    for fpath in glob.glob("inputs/harps/*.csv"):
        df = pd.read_csv(fpath)
        spike_count = (df["flux"] > 5 * df["flux"].median()).sum()
        flags.append({
            "file":        os.path.basename(fpath),
            "spike_flag":  bool(spike_count > 0),
            "spike_count": int(spike_count)
        })

    # 5) If no CSVs produced, stub it; else save real flags
    if not flags:
        print("⚠️  Retrieved HARPS files but found no CSV spectra; writing empty spec_flags.csv")
        pd.DataFrame(columns=["file","spike_flag","spike_count"])\
          .to_csv("inputs/spec_flags.csv", index=False)
    else:
        pd.DataFrame(flags)\
          .to_csv("inputs/spec_flags.csv", index=False)
        print(f"✅ HARPS flags for {len(flags)} spectra")
