In [None]:
# Cell 1: ensure inputs/ hierarchy exists
import os
os.makedirs("inputs/tess",  exist_ok=True)
os.makedirs("inputs/harps", exist_ok=True)
print("✅ inputs/ folder ready")


Cloning into 'technosignature-pipeline-v2'...
remote: Enumerating objects: 81, done.[K
remote: Counting objects: 100% (81/81), done.[K
remote: Compressing objects: 100% (76/76), done.[K
remote: Total 81 (delta 28), reused 23 (delta 1), pack-reused 0 (from 0)[K
Receiving objects: 100% (81/81), 28.50 KiB | 2.04 MiB/s, done.
Resolving deltas: 100% (28/28), done.


In [None]:
import pandas as pd

url = (
    "https://exoplanetarchive.ipac.caltech.edu/TAP/sync"
    "?query=select+pl_name,ra,dec,tic_id+from+pscomppars&format=csv"
)
pl = pd.read_csv(url)
pl.to_csv("inputs/confirmed_planets.csv", index=False)
print(f"✅ Confirmed planets: {len(pl)} rows")


In [None]:
import os
import pandas as pd
from astropy.coordinates import SkyCoord
import astropy.units as u
from astroquery.irsa import Irsa

# Output CSV
out = "inputs/wise_photometry.csv"
if os.path.exists(out):
    os.remove(out)

pl = pd.read_csv("inputs/confirmed_planets.csv")
results = []

for _, row in pl.iterrows():
    coord = SkyCoord(ra=row["ra"]*u.deg, dec=row["dec"]*u.deg, frame="icrs")
    try:
        tbl = Irsa.query_region(
            coord,
            catalog="allwise_p3as_psd",
            radius=5 * u.arcsec,
            columns=["ra","dec","w1mpro","w2mpro"]
        )
        if len(tbl) > 0:
            hit = tbl[0]
            w1, w2 = hit["w1mpro"], hit["w2mpro"]
            results.append({
                "pl_name":  row["pl_name"],
                "ra":        float(hit["ra"]),
                "dec":       float(hit["dec"]),
                "w1mpro":    float(w1),
                "w2mpro":    float(w2),
                "ir_flag":   bool((w2 - w1) > 0.5)
            })
    except Exception as e:
        print(f"IRSA failed for {row['pl_name']}: {e}")

pd.DataFrame(results).to_csv(out, index=False)
print(f"✅ AllWISE photometry for {len(results)} planets")


In [None]:
# Cell 4: Query Pan‑STARRS & SDSS via Vizier around each planet (5″ cone)
import pandas as pd
from astroquery.vizier import Vizier
from astropy.coordinates import SkyCoord
import astropy.units as u

# allow up to 10 rows per query
Vizier.ROW_LIMIT = 10

pl = pd.read_csv("inputs/confirmed_planets.csv")
ps_results = []
sd_results = []

for _, row in pl.iterrows():
    coord = SkyCoord(ra=row["ra"]*u.deg, dec=row["dec"]*u.deg, frame="icrs")
    # Pan-STARRS (PS1) g,r,i
    try:
        ps = Vizier(columns=["gmag","rmag","imag"], catalog="II/349/ps1").query_region(coord, radius=5*u.arcsec)
        if len(ps) > 0:
            hit = ps[0][0]  # first match
            ps_results.append({
                "pl_name": row["pl_name"],
                "ra": float(hit["RAJ2000"]),
                "dec": float(hit["DEJ2000"]),
                "gmag": float(hit["gmag"]),
                "rmag": float(hit["rmag"]),
                "imag": float(hit["imag"]),
                "ps_flag": True
            })
    except Exception as e:
        print(f"PS query failed for {row['pl_name']}: {e}")

    # SDSS u,g,r,i,z
    try:
        sd = Vizier(columns=["u","g","r","i","z"], catalog="V/147").query_region(coord, radius=5*u.arcsec)
        if len(sd) > 0:
            hit = sd[0][0]
            sd_results.append({
                "pl_name": row["pl_name"],
                "ra": float(hit["RA_ICRS"]),
                "dec": float(hit["DE_ICRS"]),
                "u":  float(hit["u"]),
                "g":  float(hit["g"]),
                "r":  float(hit["r"]),
                "i":  float(hit["i"]),
                "z":  float(hit["z"]),
                "sdss_flag": True
            })
    except Exception as e:
        print(f"SDSS query failed for {row['pl_name']}: {e}")

pd.DataFrame(ps_results).to_csv("inputs/panstarrs.csv", index=False)
pd.DataFrame(sd_results).to_csv("inputs/sdss.csv",     index=False)
print(f"✅ Pan‑STARRS hits: {len(ps_results)}, SDSS hits: {len(sd_results)}")


In [None]:
import os, pandas as pd
# Download
url = "https://public.breakthroughlisten.org/dataset/hits.csv"
bl_csv = "inputs/bl_hits.csv"
if not os.path.exists(bl_csv):
    pd.read_csv(url, nrows=0)  # test URL
    !wget -qO inputs/bl_hits.csv {url}

# Stream & keep only 1419–1421 MHz
out = "inputs/bl_filtered.csv"
if os.path.exists(out):
    os.remove(out)

for chunk in pd.read_csv(bl_csv, chunksize=1_000_000):
    mask = chunk["frequency_mhz"].between(1419, 1421)
    if mask.any():
        chunk[mask].to_csv(out, mode="a", index=False,
                           header=not os.path.exists(out))
    print(f"Processed {len(chunk)} rows, kept {mask.sum()}")
print("✅ Breakthrough Listen filtered")


In [None]:
import numpy as np
import pandas as pd
from astroquery.mast import Observations
import lightkurve as lk
from astropy.timeseries import BoxLeastSquares

pl = pd.read_csv("inputs/confirmed_planets.csv")
flags = []

for tic in pl["tic_id"].dropna().unique():
    try:
        lc = lk.search_lightcurve(f"TIC {int(tic)}", mission="TESS").download()
        lc_flat = lc.flatten(window_length=401)
        bls = BoxLeastSquares(lc_flat.time, lc_flat.flux)
        periods = np.linspace(0.5,10,5000)
        power = bls.power(periods,0.1).power
        depth = bls.depth.max()
        snr   = power.max() / np.std(lc_flat.flux)
        flags.append({
            "tic_id":    tic,
            "tess_flag": bool((depth>0.05) & (snr>20)),
            "tess_period": float(periods[np.argmax(power)]),
            "tess_snr":    float(snr)
        })
    except Exception as e:
        print(f"TIC {tic} error: {e}")

pd.DataFrame(flags).to_csv("inputs/tess_flags.csv", index=False)
print(f"✅ TESS flags for {len(flags)} objects")


In [None]:
from astroquery.gaia import Gaia
import pandas as pd

pl = pd.read_csv("inputs/confirmed_planets.csv")
gaia_flags = []

for _, row in pl.iterrows():
    ra,dec = row["ra"], row["dec"]
    q = f"""
        SELECT ruwe
        FROM gaiaedr3.gaia_source
        WHERE CONTAINS(
          POINT('ICRS',ra,dec),
          CIRCLE('ICRS',{ra},{dec},0.00027778)
        )=1 LIMIT 1
    """
    try:
        res = Gaia.launch_job(q).get_results().to_pandas()
        ruwe = res["ruwe"].iloc[0] if not res.empty else None
        gaia_flags.append({
            "pl_name":  row["pl_name"],
            "gaia_flag": bool(ruwe and ruwe>1.4),
            "ruwe":      float(ruwe) if ruwe else None
        })
    except Exception as e:
        print(f"Gaia failed for {row['pl_name']}: {e}")

pd.DataFrame(gaia_flags).to_csv("inputs/gaia_flags.csv", index=False)
print(f"✅ Gaia flags for {len(gaia_flags)} planets")


In [None]:
from astroquery.eso import Eso
import pandas as pd, glob, os

# 1) Retrieve one HARPS dataset (example)
eso = Eso()
# eso.login()  # if you have ESO credentials
prod = eso.query_program("HARPS")[0]
eso.retrieve_data(prod, output_dir="inputs/harps/")

# 2) Stream all CSVs, flag spikes
flags = []
for f in glob.glob("inputs/harps/*.csv"):
    df = pd.read_csv(f)
    spike_count = (df["flux"] > 5*df["flux"].median()).sum()
    flags.append({"file": os.path.basename(f),
                  "spike_flag": bool(spike_count>0),
                  "spike_count": int(spike_count)})
pd.DataFrame(flags).to_csv("inputs/spec_flags.csv", index=False)
print(f"✅ HARPS flags for {len(flags)} files")
