In [None]:
# Cell 1: ensure inputs/ hierarchy exists
import os
os.makedirs("inputs/tess",  exist_ok=True)
os.makedirs("inputs/harps", exist_ok=True)
print("✅ inputs/ folder ready")


Cloning into 'technosignature-pipeline-v2'...
remote: Enumerating objects: 81, done.[K
remote: Counting objects: 100% (81/81), done.[K
remote: Compressing objects: 100% (76/76), done.[K
remote: Total 81 (delta 28), reused 23 (delta 1), pack-reused 0 (from 0)[K
Receiving objects: 100% (81/81), 28.50 KiB | 2.04 MiB/s, done.
Resolving deltas: 100% (28/28), done.


In [None]:
import pandas as pd

url = (
    "https://exoplanetarchive.ipac.caltech.edu/TAP/sync"
    "?query=select+pl_name,ra,dec,tic_id+from+pscomppars&format=csv"
)
pl = pd.read_csv(url)
pl.to_csv("inputs/confirmed_planets.csv", index=False)
print(f"✅ Confirmed planets: {len(pl)} rows")


In [None]:
# Cell 3: Parallel AllWISE IRSA → inputs/wise_photometry.csv
import os, pandas as pd
from astropy.coordinates import SkyCoord
import astropy.units as u
from astroquery.irsa import Irsa
from multiprocessing import Pool

# Prepare output
os.makedirs("inputs", exist_ok=True)
out = "inputs/wise_photometry.csv"
if os.path.exists(out): os.remove(out)

# Load targets
pl = pd.read_csv("inputs/confirmed_planets.csv")

def query_wise(row):
    coord = SkyCoord(ra=row["ra"]*u.deg, dec=row["dec"]*u.deg, frame="icrs")
    try:
        tbl = Irsa.query_region(
            coord,
            catalog="allwise_p3as_psd",
            radius=5*u.arcsec,
            columns=["ra","dec","w1mpro","w2mpro"]
        )
        if len(tbl) > 0:
            hit = tbl[0]
            w1, w2 = hit["w1mpro"], hit["w2mpro"]
            return {
                "pl_name": row["pl_name"],
                "ra": float(hit["ra"]),
                "dec": float(hit["dec"]),
                "w1mpro": float(w1),
                "w2mpro": float(w2),
                "ir_flag": bool((w2 - w1) > 0.5)
            }
    except Exception:
        pass
    return None

# Run 4 parallel workers
with Pool(4) as p:
    results = p.map(query_wise, [r for _, r in pl.iterrows()])

# Filter out Nones and save
df_wise = pd.DataFrame([r for r in results if r])
df_wise.to_csv(out, index=False)
print(f"✅ AllWISE complete: {len(df_wise)} rows written to {out}")


In [None]:
# Cell 4: Parallel Pan‑STARRS & SDSS → inputs/panstarrs.csv & inputs/sdss.csv
import os, pandas as pd
from astroquery.vizier import Vizier
from astropy.coordinates import SkyCoord
import astropy.units as u
from multiprocessing import Pool

# Load targets
pl = pd.read_csv("inputs/confirmed_planets.csv")
Vizier.ROW_LIMIT = 1  # only need first match

def query_ps_sd(row):
    coord = SkyCoord(ra=row["ra"]*u.deg, dec=row["dec"]*u.deg, frame="icrs")
    out = {"pl_name": row["pl_name"], "ps_flag": False, "sdss_flag": False}
    try:
        ps = Vizier(columns=["gmag"], catalog="II/349/ps1") \
             .query_region(coord, radius=5*u.arcsec)
        if ps and len(ps[0]) > 0:
            out["ps_flag"] = True
    except:
        pass
    try:
        sd = Vizier(columns=["u"], catalog="V/147") \
             .query_region(coord, radius=5*u.arcsec)
        if sd and len(sd[0]) > 0:
            out["sdss_flag"] = True
    except:
        pass
    return out

with Pool(4) as p:
    hits = p.map(query_ps_sd, [r for _, r in pl.iterrows()])

df_hits = pd.DataFrame(hits)
os.makedirs("inputs", exist_ok=True)
df_hits[df_hits.ps_flag].to_csv("inputs/panstarrs.csv", index=False)
df_hits[df_hits.sdss_flag].to_csv("inputs/sdss.csv",     index=False)
print(f"✅ Pan-STARRS hits: {df_hits.ps_flag.sum()}, SDSS hits: {df_hits.sdss_flag.sum()}")


In [None]:
# Cell 5: Download & filter Breakthrough Listen hits CSV robustly
import os
import subprocess
import pandas as pd

# 1) Ensure inputs folder exists
os.makedirs("inputs", exist_ok=True)

# 2) Download the full hits CSV if we don't already have it
url = "https://public.breakthroughlisten.org/dataset/hits.csv"
in_path  = "inputs/bl_hits.csv"
out_path = "inputs/bl_filtered.csv"

if not os.path.exists(in_path):
    try:
        print("➡️  Downloading BL hits via wget…")
        subprocess.run(
            ["wget", "-O", in_path, url],
            check=True
        )
    except Exception as e_wget:
        print(f"wget failed: {e_wget}\n➡️  Trying curl…")
        try:
            subprocess.run(
                ["curl", "-L", "-o", in_path, url],
                check=True
            )
        except Exception as e_curl:
            raise RuntimeError(f"Both wget and curl failed:\n {e_wget}\n {e_curl}")

    print(f"✅ Download complete: {in_path}")
else:
    print(f"ℹ️  Already have {in_path}, skipping download")

# 3) Stream‑filter to 1 419–1 421 MHz
if os.path.exists(out_path):
    os.remove(out_path)

print("➡️  Filtering BL hits for 1419–1421 MHz…")
for chunk in pd.read_csv(in_path, chunksize=1_000_000):
    mask = chunk["frequency_mhz"].between(1419, 1421)
    if mask.any():
        chunk.loc[mask].to_csv(
            out_path,
            mode="a",
            index=False,
            header=not os.path.exists(out_path)
        )
    print(f"   Processed {len(chunk):,} rows, kept {mask.sum():,}")

print(f"✅ BL filtering complete: {out_path}")


In [None]:
# Cell 6: Parallel Gaia RUWE → inputs/gaia_flags.csv
import pandas as pd
from astroquery.gaia import Gaia
from multiprocessing import Pool

# Load targets
pl = pd.read_csv("inputs/confirmed_planets.csv")

def query_gaia(row):
    ra, dec, name = row["ra"], row["dec"], row["pl_name"]
    q = f"""
        SELECT ruwe
        FROM gaiaedr3.gaia_source
        WHERE CONTAINS(
          POINT('ICRS',ra,dec),
          CIRCLE('ICRS',{ra},{dec},0.00027778)
        )=1
        LIMIT 1
    """
    try:
        res = Gaia.launch_job(q).get_results().to_pandas()
        ruwe = res["ruwe"].iloc[0] if not res.empty else None
        return {"pl_name": name, "gaia_flag": bool(ruwe and ruwe > 1.4)}
    except:
        return {"pl_name": name, "gaia_flag": False}

with Pool(4) as p:
    flags = p.map(query_gaia, [r for _, r in pl.iterrows()])

df_gaia = pd.DataFrame(flags)
df_gaia.to_csv("inputs/gaia_flags.csv", index=False)
print(f"✅ Gaia RUWE flags complete: {df_gaia.gaia_flag.sum()} true")


In [None]:
from astroquery.gaia import Gaia
import pandas as pd

pl = pd.read_csv("inputs/confirmed_planets.csv")
gaia_flags = []

for _, row in pl.iterrows():
    ra,dec = row["ra"], row["dec"]
    q = f"""
        SELECT ruwe
        FROM gaiaedr3.gaia_source
        WHERE CONTAINS(
          POINT('ICRS',ra,dec),
          CIRCLE('ICRS',{ra},{dec},0.00027778)
        )=1 LIMIT 1
    """
    try:
        res = Gaia.launch_job(q).get_results().to_pandas()
        ruwe = res["ruwe"].iloc[0] if not res.empty else None
        gaia_flags.append({
            "pl_name":  row["pl_name"],
            "gaia_flag": bool(ruwe and ruwe>1.4),
            "ruwe":      float(ruwe) if ruwe else None
        })
    except Exception as e:
        print(f"Gaia failed for {row['pl_name']}: {e}")

pd.DataFrame(gaia_flags).to_csv("inputs/gaia_flags.csv", index=False)
print(f"✅ Gaia flags for {len(gaia_flags)} planets")


In [None]:
from astroquery.eso import Eso
import pandas as pd, glob, os

# 1) Retrieve one HARPS dataset (example)
eso = Eso()
# eso.login()  # if you have ESO credentials
prod = eso.query_program("HARPS")[0]
eso.retrieve_data(prod, output_dir="inputs/harps/")

# 2) Stream all CSVs, flag spikes
flags = []
for f in glob.glob("inputs/harps/*.csv"):
    df = pd.read_csv(f)
    spike_count = (df["flux"] > 5*df["flux"].median()).sum()
    flags.append({"file": os.path.basename(f),
                  "spike_flag": bool(spike_count>0),
                  "spike_count": int(spike_count)})
pd.DataFrame(flags).to_csv("inputs/spec_flags.csv", index=False)
print(f"✅ HARPS flags for {len(flags)} files")
