In [None]:
# Cell 1: ensure working folders exist
import os

# raw & intermediate input folders
os.makedirs("inputs",        exist_ok=True)
os.makedirs("inputs/tess",   exist_ok=True)
os.makedirs("inputs/harps",  exist_ok=True)

# final output folders
os.makedirs("outputs",        exist_ok=True)
os.makedirs("outputs/reports", exist_ok=True)


Cloning into 'technosignature-pipeline-v2'...
remote: Enumerating objects: 93, done.[K
remote: Counting objects: 100% (93/93), done.[K
remote: Compressing objects: 100% (88/88), done.[K
remote: Total 93 (delta 33), reused 23 (delta 1), pack-reused 0 (from 0)[K
Receiving objects: 100% (93/93), 34.10 KiB | 529.00 KiB/s, done.
Resolving deltas: 100% (33/33), done.


In [None]:
# Cell 2: compute techno_score & pick top 20
import pandas as pd

# 1) Load your merged feature matrix
fm = pd.read_csv("outputs/feature_matrix.csv")

# 2) Define exactly which flags to sum
flag_cols = [
    "ir_flag",    # IR excess (WISE)
    "ps_flag",    # Pan‑STARRS hit
    "sdss_flag",  # SDSS hit
    "radio_flag", # Radio (Breakthrough Listen)
    "gaia_flag",  # Gaia RUWE
    "spec_flag",  # Spectral spikes (HARPS)
    "tess_flag"   # TESS transit
]

# 3) Sanity check: make sure all these columns exist
missing = [f for f in flag_cols if f not in fm.columns]
if missing:
    raise RuntimeError(f"Missing flags in feature_matrix.csv: {missing}")

# 4) Compute techno_score and select top 20
fm["techno_score"] = fm[flag_cols].astype(int).sum(axis=1)
top20 = fm.nlargest(20, "techno_score")

# 5) Save results
top20.to_csv("outputs/top_candidates.csv", index=False)

print("✅ Top 20 technosignature candidates:")
print(top20[["pl_name", "techno_score"]])
