<a href="https://colab.research.google.com/github/rbaner21/technosignature-pipeline-v2/blob/main/notebooks/3b_merge_tess.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Cell 1: ensure outputs/ exists & safely load base matrix + TESS flags
import os
import pandas as pd
from pandas.errors import EmptyDataError

# Make sure outputs/ is present
os.makedirs("outputs", exist_ok=True)

# Helper to read a CSV or return an empty DataFrame with expected columns
def safe_read(path, cols):
    if not os.path.exists(path):
        return pd.DataFrame(columns=cols)
    try:
        return pd.read_csv(path)
    except EmptyDataError:
        return pd.DataFrame(columns=cols)

# 1) Load the base feature matrix
base = pd.read_csv("outputs/feature_base.csv")

# 2) Load TESS flags (if present), expecting pl_name, tic_id, tess_flag
tess_cols = ["pl_name", "tic_id", "tess_flag"]
tess  = safe_read("inputs/tess_flags.csv", tess_cols)

print(f"✅ Loaded base ({len(base)} rows) + TESS flags ({len(tess)} rows)")


In [None]:
# Cell 2: merge TESS into the base matrix & write final feature matrix
# 1) Merge on tic_id
fm = base.merge(
    tess[["tic_id", "tess_flag"]],
    on="tic_id",
    how="left"
)

# 2) Any planets without a TESS flag get False
fm["tess_flag"] = fm["tess_flag"].fillna(False)

# 3) Save the complete feature matrix
fm.to_csv("outputs/feature_matrix.csv", index=False)
print(f"✅ Final feature_matrix.csv ({len(fm)} rows)")
