In [1]:
# 02_download_clean — Target A (PDCSAP-first, TESSCut-FFI fallback) — robust & batchable
import json, warnings, pathlib, re
import numpy as np
import matplotlib.pyplot as plt
warnings.filterwarnings("ignore")

# --- Config (single-target defaults) ---
TARGET_TOI = "TOI 1801.01"
TARGET_TIC = 119584412
QUALITY    = 175
CUTOUT_SZ  = 15
WIN_DAYS   = 1.0

# --- Folders ---
pathlib.Path("figures").mkdir(exist_ok=True)
pathlib.Path("results").mkdir(exist_ok=True)
pathlib.Path("data_raw_fresh").mkdir(exist_ok=True)

# --- Helpers ---
def window_len_for(t_days, window_days=1.0):
    dt = np.nanmedian(np.diff(t_days))
    wl = 401 if (not np.isfinite(dt) or dt<=0) else max(5, int(round(window_days/dt)))
    return wl if wl % 2 == 1 else wl + 1

def savgol_trend(t, y, window_len, polyorder=2):
    half = window_len//2
    trend = np.full_like(y, np.nan, dtype=float)
    for i in range(len(y)):
        lo, hi = max(0, i-half), min(len(y), i+half+1)
        tt = t[lo:hi] - t[i]; yy = y[lo:hi]
        m  = np.isfinite(tt) & np.isfinite(yy)
        if np.count_nonzero(m) >= polyorder+1:
            c = np.polyfit(tt[m], yy[m], polyorder)
            trend[i] = np.polyval(c, 0.0)
    trend[~np.isfinite(trend)] = np.nanmedian(y)
    return trend

def custom_flatten(lc, window_days=1.0, polyorder=2):
    """Return a *new* LightCurve with flattened flux (no .replace() calls)."""
    t = lc.time.value
    f = lc.flux.value.astype(float)
    wl = window_len_for(t, window_days)
    tr = savgol_trend(t, f, wl, polyorder=polyorder)
    flat = f/np.where(np.isfinite(tr), tr, np.nanmedian(f))
    flat /= np.nanmedian(flat)
    # carry flux_err if present
    try:
        ferr = lc.flux_err.value if getattr(lc, "flux_err", None) is not None else None
    except Exception:
        ferr = None
    from lightkurve import LightCurve
    return LightCurve(time=lc.time, flux=flat, flux_err=ferr, meta=dict(getattr(lc, "meta", {})))

def approx_cdpp_ppm(lc, hours=1.0):
    t = lc.time.value; f = lc.flux.value
    dt_hr = np.nanmedian(np.diff(t))*24.0
    if not np.isfinite(dt_hr) or dt_hr<=0: return float("nan")
    n = max(1, int(round(hours/dt_hr)))
    run = np.convolve(f - np.nanmedian(f), np.ones(n)/n, mode="valid")
    return float(np.nanstd(run)*1e6)

def get_header_sector(header):
    # Try common sector keys; then regex fallback
    for k in ("SECTOR","sector","SEC1"):
        try:
            v = header.get(k)
            if v is not None:
                return int(v)
        except Exception:
            pass
    txt = " ".join([f"{k}={header.get(k)}" for k in header.keys()])
    m = re.search(r"[Ss]ector[^0-9]*([0-9]{1,3})", txt)
    return int(m.group(1)) if m else None

# --- Lightkurve ---
import lightkurve as lk
from lightkurve import LightCurveCollection

def run_download_clean(target_toi: str, target_tic: int, quality=QUALITY, cutout_sz=CUTOUT_SZ, win_days=WIN_DAYS):
    TARGET = f"TIC {target_tic}"
    out_fig  = f"figures/TIC{target_tic}_download_clean.png"
    out_json = f"results/TIC{target_tic}_download_clean_summary.json"

    print(f"Target: {target_toi} ({TARGET})")

    # -------------------------
    # 1) Download SPOC PDCSAP first; read sector from files (robust to missing 'sector' column)
    # -------------------------
    pdcsap_list, got_spoc_sectors = [], set()
    sr_spoc = lk.search_lightcurvefile(TARGET, mission="TESS", author="SPOC")

    if len(sr_spoc) == 0:
        print("No SPOC PDCSAP products found.")
    else:
        for i in range(len(sr_spoc)):
            try:
                lcf = sr_spoc[i].download(download_dir="data_raw_fresh")
                lc  = lcf.PDCSAP_FLUX.remove_nans().normalize()
                # sector from object or header
                s = getattr(lcf, "sector", None)
                if s is None:
                    try:
                        hdr = lcf.header() if hasattr(lcf, "header") else lcf.get_header()
                    except Exception:
                        hdr = None
                    if hdr is not None:
                        s = get_header_sector(hdr)
                if s is None: s = -1
                lc.meta["source"] = "PDCSAP"; lc.meta["sector"] = int(s)
                pdcsap_list.append(lc)
                if s != -1: got_spoc_sectors.add(int(s))
                print(f" PDCSAP S{int(s) if s!=-1 else '?'}: ok (N={len(lc)})")
            except Exception as e:
                print(f" PDCSAP entry {i}: failed -> {e}")

    # -------------------------
    # 2) Query TESSCut and add FFI for sectors not covered by PDCSAP
    # -------------------------
    ffi_list = []
    try:
        sr_cut = lk.search_tesscut(TARGET)
    except Exception as e:
        sr_cut = []
        print("TESSCut query failed:", e)

    if len(sr_cut) == 0:
        print("No TESSCut entries found.")
    else:
        for i in range(len(sr_cut)):
            try:
                tpf = sr_cut[i].download(cutout_size=cutout_sz, download_dir="data_raw_fresh")
                # sector from object or header
                s = getattr(tpf, "sector", None)
                if s is None:
                    try:
                        hdr = tpf.hdu[0].header
                    except Exception:
                        hdr = None
                    if hdr is not None:
                        s = get_header_sector(hdr)
                if s is None: s = -1
                # skip if PDCSAP already covers
                if (s != -1) and (int(s) in got_spoc_sectors):
                    print(f"  FFI S{int(s)}: skip (already have PDCSAP)")
                    continue
                # aperture photometry
                mask = tpf.create_threshold_mask(threshold=3, reference_pixel=None)
                if not np.any(mask):
                    m = np.zeros_like(mask, dtype=bool)
                    yy, xx = np.unravel_index(np.nanargmax(np.nanmedian(tpf.flux.value, axis=0)), mask.shape)
                    m[yy, xx] = True
                    mask = m
                lc = tpf.to_lightcurve(aperture_mask=mask).remove_nans().normalize()
                lc.meta["source"] = "FFI"; lc.meta["sector"] = int(s)
                ffi_list.append(lc)
                print(f"  FFI S{int(s) if s!=-1 else '?'}: ok (N={len(lc)})")
            except Exception as e:
                print(f"  FFI entry {i}: failed -> {e}")

    if not (pdcsap_list or ffi_list):
        raise RuntimeError("No light curves obtained from SPOC or TESSCut.")

    # -------------------------
    # 3) Stitch, flatten, QC
    # -------------------------
    all_lcs_raw   = pdcsap_list + ffi_list
    stitched_raw  = LightCurveCollection(all_lcs_raw).stitch().remove_nans()
    stitched_flat = custom_flatten(stitched_raw, window_days=win_days)

    def rms_ppm(x): return float(np.nanstd(x)*1e6)
    qc = {
        "target_toi": target_toi,
        "target_tic": target_tic,
        "n_sectors_pdcsap": len(pdcsap_list),
        "n_sectors_ffi": len(ffi_list),
        "sectors_pdcsap": [int(lc.meta.get("sector", -1)) for lc in pdcsap_list],
        "sectors_ffi": [int(lc.meta.get("sector", -1)) for lc in ffi_list],
        "n_points_raw": int(len(stitched_raw)),
        "n_points_flat": int(len(stitched_flat)),
        "rms_raw_ppm": rms_ppm(stitched_raw.flux.value),
        "rms_flat_ppm": rms_ppm(stitched_flat.flux.value),
        "cdpp1h_flat_ppm": approx_cdpp_ppm(stitched_flat, hours=1.0),
        "flatten_window_days": float(win_days),
        "notes": "PDCSAP preferred; FFI(TESSCut) used where SPOC PDCSAP missing."
    }

    # -------------------------
    # 4) Figure & JSON
    # -------------------------
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12,6), dpi=140, sharex=True)
    stitched_raw.plot(ax=ax1, marker=".", lw=0, alpha=0.35)
    ax1.set_ylabel("Flux (norm)")
    ax1.set_title(f"{target_toi} (TIC {target_tic}) — Stitched RAW")
    stitched_flat.plot(ax=ax2, marker=".", lw=0, alpha=0.45, label="flattened")
    ax2.set_xlabel("Time [BTJD]")
    ax2.set_ylabel("Flux (norm)")
    ax2.set_title(f"{target_toi} — Stitched FLAT (window={win_days:.2f} d)")
    ax2.legend(loc="best")
    fig.tight_layout()
    fig.savefig(out_fig); plt.close(fig)

    with open(out_json, "w") as f:
        json.dump(qc, f, indent=2)

    print("\n== DONE ==")
    print(f"Saved figure: {out_fig}")
    print(f"Saved JSON  : {out_json}")
    print("Summary:", qc)

    return qc, out_fig, out_json

# ---------- Run on Target A (deliverable) ----------
_ = run_download_clean(TARGET_TOI, TARGET_TIC, quality=QUALITY, cutout_sz=CUTOUT_SZ, win_days=WIN_DAYS)

# ---------- OPTIONAL: batch-run first 4 priority targets ----------
# Set RUN_BATCH=True to run on the first 4 rows in results/priority_targets.csv
RUN_BATCH = False
if RUN_BATCH:
    import pandas as pd
    dfp = pd.read_csv("results/priority_targets.csv")
    # Expect columns: TIC_ID_norm, toi (string/number). Adjust if your headers differ.
    for _, row in dfp.head(4).iterrows():
        tic = int(row["TIC_ID_norm"])
        toi = f"TOI {row['toi']}"
        try:
            run_download_clean(toi, tic, quality=QUALITY, cutout_sz=CUTOUT_SZ, win_days=WIN_DAYS)
        except Exception as e:
            print(f"[Batch] {toi} (TIC {tic}) failed: {e}")

Target: TOI 1801.01 (TIC 119584412)
 PDCSAP S22: ok (N=16102)
 PDCSAP S49: ok (N=13272)
  FFI S22: skip (already have PDCSAP)
  FFI S49: skip (already have PDCSAP)

== DONE ==
Saved figure: figures/TIC119584412_download_clean.png
Saved JSON  : results/TIC119584412_download_clean_summary.json
Summary: {'target_toi': 'TOI 1801.01', 'target_tic': 119584412, 'n_sectors_pdcsap': 2, 'n_sectors_ffi': 0, 'sectors_pdcsap': [22, 49], 'sectors_ffi': [], 'n_points_raw': 29374, 'n_points_flat': 29374, 'rms_raw_ppm': 1359.3999901786447, 'rms_flat_ppm': 1099.746714683546, 'cdpp1h_flat_ppm': 251.16073602817906, 'flatten_window_days': 1.0, 'notes': 'PDCSAP preferred; FFI(TESSCut) used where SPOC PDCSAP missing.'}


In [2]:
# 02_download_clean — Target A (PDCSAP-first, TESSCut-FFI fallback) — robust & batchable
import json, warnings, pathlib, re
import numpy as np
import matplotlib.pyplot as plt
warnings.filterwarnings("ignore")

# --- Config (single-target defaults) ---
TARGET_TOI= "TOI 260.01"
TARGET_TIC= 37749396
QUALITY    = 175
CUTOUT_SZ  = 15
WIN_DAYS   = 1.0

# --- Folders ---
pathlib.Path("figures").mkdir(exist_ok=True)
pathlib.Path("results").mkdir(exist_ok=True)
pathlib.Path("data_raw_fresh").mkdir(exist_ok=True)

# --- Helpers ---
def window_len_for(t_days, window_days=1.0):
    dt = np.nanmedian(np.diff(t_days))
    wl = 401 if (not np.isfinite(dt) or dt<=0) else max(5, int(round(window_days/dt)))
    return wl if wl % 2 == 1 else wl + 1

def savgol_trend(t, y, window_len, polyorder=2):
    half = window_len//2
    trend = np.full_like(y, np.nan, dtype=float)
    for i in range(len(y)):
        lo, hi = max(0, i-half), min(len(y), i+half+1)
        tt = t[lo:hi] - t[i]; yy = y[lo:hi]
        m  = np.isfinite(tt) & np.isfinite(yy)
        if np.count_nonzero(m) >= polyorder+1:
            c = np.polyfit(tt[m], yy[m], polyorder)
            trend[i] = np.polyval(c, 0.0)
    trend[~np.isfinite(trend)] = np.nanmedian(y)
    return trend

def custom_flatten(lc, window_days=1.0, polyorder=2):
    """Return a *new* LightCurve with flattened flux (no .replace() calls)."""
    t = lc.time.value
    f = lc.flux.value.astype(float)
    wl = window_len_for(t, window_days)
    tr = savgol_trend(t, f, wl, polyorder=polyorder)
    flat = f/np.where(np.isfinite(tr), tr, np.nanmedian(f))
    flat /= np.nanmedian(flat)
    # carry flux_err if present
    try:
        ferr = lc.flux_err.value if getattr(lc, "flux_err", None) is not None else None
    except Exception:
        ferr = None
    from lightkurve import LightCurve
    return LightCurve(time=lc.time, flux=flat, flux_err=ferr, meta=dict(getattr(lc, "meta", {})))

def approx_cdpp_ppm(lc, hours=1.0):
    t = lc.time.value; f = lc.flux.value
    dt_hr = np.nanmedian(np.diff(t))*24.0
    if not np.isfinite(dt_hr) or dt_hr<=0: return float("nan")
    n = max(1, int(round(hours/dt_hr)))
    run = np.convolve(f - np.nanmedian(f), np.ones(n)/n, mode="valid")
    return float(np.nanstd(run)*1e6)

def get_header_sector(header):
    # Try common sector keys; then regex fallback
    for k in ("SECTOR","sector","SEC1"):
        try:
            v = header.get(k)
            if v is not None:
                return int(v)
        except Exception:
            pass
    txt = " ".join([f"{k}={header.get(k)}" for k in header.keys()])
    m = re.search(r"[Ss]ector[^0-9]*([0-9]{1,3})", txt)
    return int(m.group(1)) if m else None

# --- Lightkurve ---
import lightkurve as lk
from lightkurve import LightCurveCollection

def run_download_clean(target_toi: str, target_tic: int, quality=QUALITY, cutout_sz=CUTOUT_SZ, win_days=WIN_DAYS):
    TARGET = f"TIC {target_tic}"
    out_fig  = f"figures/TIC{target_tic}_download_clean.png"
    out_json = f"results/TIC{target_tic}_download_clean_summary.json"

    print(f"Target: {target_toi} ({TARGET})")

    # -------------------------
    # 1) Download SPOC PDCSAP first; read sector from files (robust to missing 'sector' column)
    # -------------------------
    pdcsap_list, got_spoc_sectors = [], set()
    sr_spoc = lk.search_lightcurvefile(TARGET, mission="TESS", author="SPOC")

    if len(sr_spoc) == 0:
        print("No SPOC PDCSAP products found.")
    else:
        for i in range(len(sr_spoc)):
            try:
                lcf = sr_spoc[i].download(download_dir="data_raw_fresh")
                lc  = lcf.PDCSAP_FLUX.remove_nans().normalize()
                # sector from object or header
                s = getattr(lcf, "sector", None)
                if s is None:
                    try:
                        hdr = lcf.header() if hasattr(lcf, "header") else lcf.get_header()
                    except Exception:
                        hdr = None
                    if hdr is not None:
                        s = get_header_sector(hdr)
                if s is None: s = -1
                lc.meta["source"] = "PDCSAP"; lc.meta["sector"] = int(s)
                pdcsap_list.append(lc)
                if s != -1: got_spoc_sectors.add(int(s))
                print(f" PDCSAP S{int(s) if s!=-1 else '?'}: ok (N={len(lc)})")
            except Exception as e:
                print(f" PDCSAP entry {i}: failed -> {e}")

    # -------------------------
    # 2) Query TESSCut and add FFI for sectors not covered by PDCSAP
    # -------------------------
    ffi_list = []
    try:
        sr_cut = lk.search_tesscut(TARGET)
    except Exception as e:
        sr_cut = []
        print("TESSCut query failed:", e)

    if len(sr_cut) == 0:
        print("No TESSCut entries found.")
    else:
        for i in range(len(sr_cut)):
            try:
                tpf = sr_cut[i].download(cutout_size=cutout_sz, download_dir="data_raw_fresh")
                # sector from object or header
                s = getattr(tpf, "sector", None)
                if s is None:
                    try:
                        hdr = tpf.hdu[0].header
                    except Exception:
                        hdr = None
                    if hdr is not None:
                        s = get_header_sector(hdr)
                if s is None: s = -1
                # skip if PDCSAP already covers
                if (s != -1) and (int(s) in got_spoc_sectors):
                    print(f"  FFI S{int(s)}: skip (already have PDCSAP)")
                    continue
                # aperture photometry
                mask = tpf.create_threshold_mask(threshold=3, reference_pixel=None)
                if not np.any(mask):
                    m = np.zeros_like(mask, dtype=bool)
                    yy, xx = np.unravel_index(np.nanargmax(np.nanmedian(tpf.flux.value, axis=0)), mask.shape)
                    m[yy, xx] = True
                    mask = m
                lc = tpf.to_lightcurve(aperture_mask=mask).remove_nans().normalize()
                lc.meta["source"] = "FFI"; lc.meta["sector"] = int(s)
                ffi_list.append(lc)
                print(f"  FFI S{int(s) if s!=-1 else '?'}: ok (N={len(lc)})")
            except Exception as e:
                print(f"  FFI entry {i}: failed -> {e}")

    if not (pdcsap_list or ffi_list):
        raise RuntimeError("No light curves obtained from SPOC or TESSCut.")

    # -------------------------
    # 3) Stitch, flatten, QC
    # -------------------------
    all_lcs_raw   = pdcsap_list + ffi_list
    stitched_raw  = LightCurveCollection(all_lcs_raw).stitch().remove_nans()
    stitched_flat = custom_flatten(stitched_raw, window_days=win_days)

    def rms_ppm(x): return float(np.nanstd(x)*1e6)
    qc = {
        "target_toi": target_toi,
        "target_tic": target_tic,
        "n_sectors_pdcsap": len(pdcsap_list),
        "n_sectors_ffi": len(ffi_list),
        "sectors_pdcsap": [int(lc.meta.get("sector", -1)) for lc in pdcsap_list],
        "sectors_ffi": [int(lc.meta.get("sector", -1)) for lc in ffi_list],
        "n_points_raw": int(len(stitched_raw)),
        "n_points_flat": int(len(stitched_flat)),
        "rms_raw_ppm": rms_ppm(stitched_raw.flux.value),
        "rms_flat_ppm": rms_ppm(stitched_flat.flux.value),
        "cdpp1h_flat_ppm": approx_cdpp_ppm(stitched_flat, hours=1.0),
        "flatten_window_days": float(win_days),
        "notes": "PDCSAP preferred; FFI(TESSCut) used where SPOC PDCSAP missing."
    }

    # -------------------------
    # 4) Figure & JSON
    # -------------------------
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12,6), dpi=140, sharex=True)
    stitched_raw.plot(ax=ax1, marker=".", lw=0, alpha=0.35)
    ax1.set_ylabel("Flux (norm)")
    ax1.set_title(f"{target_toi} (TIC {target_tic}) — Stitched RAW")
    stitched_flat.plot(ax=ax2, marker=".", lw=0, alpha=0.45, label="flattened")
    ax2.set_xlabel("Time [BTJD]")
    ax2.set_ylabel("Flux (norm)")
    ax2.set_title(f"{target_toi} — Stitched FLAT (window={win_days:.2f} d)")
    ax2.legend(loc="best")
    fig.tight_layout()
    fig.savefig(out_fig); plt.close(fig)

    with open(out_json, "w") as f:
        json.dump(qc, f, indent=2)

    print("\n== DONE ==")
    print(f"Saved figure: {out_fig}")
    print(f"Saved JSON  : {out_json}")
    print("Summary:", qc)

    return qc, out_fig, out_json

# ---------- Run on Target A (deliverable) ----------
_ = run_download_clean(TARGET_TOI, TARGET_TIC, quality=QUALITY, cutout_sz=CUTOUT_SZ, win_days=WIN_DAYS)

# ---------- OPTIONAL: batch-run first 4 priority targets ----------
# Set RUN_BATCH=True to run on the first 4 rows in results/priority_targets.csv
RUN_BATCH = False
if RUN_BATCH:
    import pandas as pd
    dfp = pd.read_csv("results/priority_targets.csv")
    # Expect columns: TIC_ID_norm, toi (string/number). Adjust if your headers differ.
    for _, row in dfp.head(4).iterrows():
        tic = int(row["TIC_ID_norm"])
        toi = f"TOI {row['toi']}"
        try:
            run_download_clean(toi, tic, quality=QUALITY, cutout_sz=CUTOUT_SZ, win_days=WIN_DAYS)
        except Exception as e:
            print(f"[Batch] {toi} (TIC {tic}) failed: {e}")

Target: TOI 260.01 (TIC 37749396)




 PDCSAP S3: ok (N=12978)
 PDCSAP S42: ok (N=11473)
 PDCSAP S70: ok (N=86180)
 PDCSAP S70: ok (N=14424)
  FFI S3: skip (already have PDCSAP)
  FFI S42: skip (already have PDCSAP)
  FFI S70: skip (already have PDCSAP)

== DONE ==
Saved figure: figures/TIC37749396_download_clean.png
Saved JSON  : results/TIC37749396_download_clean_summary.json
Summary: {'target_toi': 'TOI 260.01', 'target_tic': 37749396, 'n_sectors_pdcsap': 4, 'n_sectors_ffi': 0, 'sectors_pdcsap': [3, 42, 70, 70], 'sectors_ffi': [], 'n_points_raw': 125055, 'n_points_flat': 125055, 'rms_raw_ppm': 1043.9811740070581, 'rms_flat_ppm': 1035.729219446299, 'cdpp1h_flat_ppm': 112.01310543832174, 'flatten_window_days': 1.0, 'notes': 'PDCSAP preferred; FFI(TESSCut) used where SPOC PDCSAP missing.'}


In [3]:
# 02_download_clean — Target A (PDCSAP-first, TESSCut-FFI fallback) — robust & batchable
import json, warnings, pathlib, re
import numpy as np
import matplotlib.pyplot as plt
warnings.filterwarnings("ignore")

# --- Config (single-target defaults) ---
TARGET_TOI="TOI 550.02"
TARGET_TIC=311183180
QUALITY    = 175
CUTOUT_SZ  = 15
WIN_DAYS   = 1.0

# --- Folders ---
pathlib.Path("figures").mkdir(exist_ok=True)
pathlib.Path("results").mkdir(exist_ok=True)
pathlib.Path("data_raw_fresh").mkdir(exist_ok=True)

# --- Helpers ---
def window_len_for(t_days, window_days=1.0):
    dt = np.nanmedian(np.diff(t_days))
    wl = 401 if (not np.isfinite(dt) or dt<=0) else max(5, int(round(window_days/dt)))
    return wl if wl % 2 == 1 else wl + 1

def savgol_trend(t, y, window_len, polyorder=2):
    half = window_len//2
    trend = np.full_like(y, np.nan, dtype=float)
    for i in range(len(y)):
        lo, hi = max(0, i-half), min(len(y), i+half+1)
        tt = t[lo:hi] - t[i]; yy = y[lo:hi]
        m  = np.isfinite(tt) & np.isfinite(yy)
        if np.count_nonzero(m) >= polyorder+1:
            c = np.polyfit(tt[m], yy[m], polyorder)
            trend[i] = np.polyval(c, 0.0)
    trend[~np.isfinite(trend)] = np.nanmedian(y)
    return trend

def custom_flatten(lc, window_days=1.0, polyorder=2):
    """Return a *new* LightCurve with flattened flux (no .replace() calls)."""
    t = lc.time.value
    f = lc.flux.value.astype(float)
    wl = window_len_for(t, window_days)
    tr = savgol_trend(t, f, wl, polyorder=polyorder)
    flat = f/np.where(np.isfinite(tr), tr, np.nanmedian(f))
    flat /= np.nanmedian(flat)
    # carry flux_err if present
    try:
        ferr = lc.flux_err.value if getattr(lc, "flux_err", None) is not None else None
    except Exception:
        ferr = None
    from lightkurve import LightCurve
    return LightCurve(time=lc.time, flux=flat, flux_err=ferr, meta=dict(getattr(lc, "meta", {})))

def approx_cdpp_ppm(lc, hours=1.0):
    t = lc.time.value; f = lc.flux.value
    dt_hr = np.nanmedian(np.diff(t))*24.0
    if not np.isfinite(dt_hr) or dt_hr<=0: return float("nan")
    n = max(1, int(round(hours/dt_hr)))
    run = np.convolve(f - np.nanmedian(f), np.ones(n)/n, mode="valid")
    return float(np.nanstd(run)*1e6)

def get_header_sector(header):
    # Try common sector keys; then regex fallback
    for k in ("SECTOR","sector","SEC1"):
        try:
            v = header.get(k)
            if v is not None:
                return int(v)
        except Exception:
            pass
    txt = " ".join([f"{k}={header.get(k)}" for k in header.keys()])
    m = re.search(r"[Ss]ector[^0-9]*([0-9]{1,3})", txt)
    return int(m.group(1)) if m else None

# --- Lightkurve ---
import lightkurve as lk
from lightkurve import LightCurveCollection

def run_download_clean(target_toi: str, target_tic: int, quality=QUALITY, cutout_sz=CUTOUT_SZ, win_days=WIN_DAYS):
    TARGET = f"TIC {target_tic}"
    out_fig  = f"figures/TIC{target_tic}_download_clean.png"
    out_json = f"results/TIC{target_tic}_download_clean_summary.json"

    print(f"Target: {target_toi} ({TARGET})")

    # -------------------------
    # 1) Download SPOC PDCSAP first; read sector from files (robust to missing 'sector' column)
    # -------------------------
    pdcsap_list, got_spoc_sectors = [], set()
    sr_spoc = lk.search_lightcurvefile(TARGET, mission="TESS", author="SPOC")

    if len(sr_spoc) == 0:
        print("No SPOC PDCSAP products found.")
    else:
        for i in range(len(sr_spoc)):
            try:
                lcf = sr_spoc[i].download(download_dir="data_raw_fresh")
                lc  = lcf.PDCSAP_FLUX.remove_nans().normalize()
                # sector from object or header
                s = getattr(lcf, "sector", None)
                if s is None:
                    try:
                        hdr = lcf.header() if hasattr(lcf, "header") else lcf.get_header()
                    except Exception:
                        hdr = None
                    if hdr is not None:
                        s = get_header_sector(hdr)
                if s is None: s = -1
                lc.meta["source"] = "PDCSAP"; lc.meta["sector"] = int(s)
                pdcsap_list.append(lc)
                if s != -1: got_spoc_sectors.add(int(s))
                print(f" PDCSAP S{int(s) if s!=-1 else '?'}: ok (N={len(lc)})")
            except Exception as e:
                print(f" PDCSAP entry {i}: failed -> {e}")

    # -------------------------
    # 2) Query TESSCut and add FFI for sectors not covered by PDCSAP
    # -------------------------
    ffi_list = []
    try:
        sr_cut = lk.search_tesscut(TARGET)
    except Exception as e:
        sr_cut = []
        print("TESSCut query failed:", e)

    if len(sr_cut) == 0:
        print("No TESSCut entries found.")
    else:
        for i in range(len(sr_cut)):
            try:
                tpf = sr_cut[i].download(cutout_size=cutout_sz, download_dir="data_raw_fresh")
                # sector from object or header
                s = getattr(tpf, "sector", None)
                if s is None:
                    try:
                        hdr = tpf.hdu[0].header
                    except Exception:
                        hdr = None
                    if hdr is not None:
                        s = get_header_sector(hdr)
                if s is None: s = -1
                # skip if PDCSAP already covers
                if (s != -1) and (int(s) in got_spoc_sectors):
                    print(f"  FFI S{int(s)}: skip (already have PDCSAP)")
                    continue
                # aperture photometry
                mask = tpf.create_threshold_mask(threshold=3, reference_pixel=None)
                if not np.any(mask):
                    m = np.zeros_like(mask, dtype=bool)
                    yy, xx = np.unravel_index(np.nanargmax(np.nanmedian(tpf.flux.value, axis=0)), mask.shape)
                    m[yy, xx] = True
                    mask = m
                lc = tpf.to_lightcurve(aperture_mask=mask).remove_nans().normalize()
                lc.meta["source"] = "FFI"; lc.meta["sector"] = int(s)
                ffi_list.append(lc)
                print(f"  FFI S{int(s) if s!=-1 else '?'}: ok (N={len(lc)})")
            except Exception as e:
                print(f"  FFI entry {i}: failed -> {e}")

    if not (pdcsap_list or ffi_list):
        raise RuntimeError("No light curves obtained from SPOC or TESSCut.")

    # -------------------------
    # 3) Stitch, flatten, QC
    # -------------------------
    all_lcs_raw   = pdcsap_list + ffi_list
    stitched_raw  = LightCurveCollection(all_lcs_raw).stitch().remove_nans()
    stitched_flat = custom_flatten(stitched_raw, window_days=win_days)

    def rms_ppm(x): return float(np.nanstd(x)*1e6)
    qc = {
        "target_toi": target_toi,
        "target_tic": target_tic,
        "n_sectors_pdcsap": len(pdcsap_list),
        "n_sectors_ffi": len(ffi_list),
        "sectors_pdcsap": [int(lc.meta.get("sector", -1)) for lc in pdcsap_list],
        "sectors_ffi": [int(lc.meta.get("sector", -1)) for lc in ffi_list],
        "n_points_raw": int(len(stitched_raw)),
        "n_points_flat": int(len(stitched_flat)),
        "rms_raw_ppm": rms_ppm(stitched_raw.flux.value),
        "rms_flat_ppm": rms_ppm(stitched_flat.flux.value),
        "cdpp1h_flat_ppm": approx_cdpp_ppm(stitched_flat, hours=1.0),
        "flatten_window_days": float(win_days),
        "notes": "PDCSAP preferred; FFI(TESSCut) used where SPOC PDCSAP missing."
    }

    # -------------------------
    # 4) Figure & JSON
    # -------------------------
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12,6), dpi=140, sharex=True)
    stitched_raw.plot(ax=ax1, marker=".", lw=0, alpha=0.35)
    ax1.set_ylabel("Flux (norm)")
    ax1.set_title(f"{target_toi} (TIC {target_tic}) — Stitched RAW")
    stitched_flat.plot(ax=ax2, marker=".", lw=0, alpha=0.45, label="flattened")
    ax2.set_xlabel("Time [BTJD]")
    ax2.set_ylabel("Flux (norm)")
    ax2.set_title(f"{target_toi} — Stitched FLAT (window={win_days:.2f} d)")
    ax2.legend(loc="best")
    fig.tight_layout()
    fig.savefig(out_fig); plt.close(fig)

    with open(out_json, "w") as f:
        json.dump(qc, f, indent=2)

    print("\n== DONE ==")
    print(f"Saved figure: {out_fig}")
    print(f"Saved JSON  : {out_json}")
    print("Summary:", qc)

    return qc, out_fig, out_json

# ---------- Run on Target A (deliverable) ----------
_ = run_download_clean(TARGET_TOI, TARGET_TIC, quality=QUALITY, cutout_sz=CUTOUT_SZ, win_days=WIN_DAYS)

# ---------- OPTIONAL: batch-run first 4 priority targets ----------
# Set RUN_BATCH=True to run on the first 4 rows in results/priority_targets.csv
RUN_BATCH = False
if RUN_BATCH:
    import pandas as pd
    dfp = pd.read_csv("results/priority_targets.csv")
    # Expect columns: TIC_ID_norm, toi (string/number). Adjust if your headers differ.
    for _, row in dfp.head(4).iterrows():
        tic = int(row["TIC_ID_norm"])
        toi = f"TOI {row['toi']}"
        try:
            run_download_clean(toi, tic, quality=QUALITY, cutout_sz=CUTOUT_SZ, win_days=WIN_DAYS)
        except Exception as e:
            print(f"[Batch] {toi} (TIC {tic}) failed: {e}")

Target: TOI 550.02 (TIC 311183180)
 PDCSAP S5: ok (N=17286)
 PDCSAP S31: ok (N=16250)
  FFI S5: skip (already have PDCSAP)
  FFI S31: skip (already have PDCSAP)

== DONE ==
Saved figure: figures/TIC311183180_download_clean.png
Saved JSON  : results/TIC311183180_download_clean_summary.json
Summary: {'target_toi': 'TOI 550.02', 'target_tic': 311183180, 'n_sectors_pdcsap': 2, 'n_sectors_ffi': 0, 'sectors_pdcsap': [5, 31], 'sectors_ffi': [], 'n_points_raw': 33536, 'n_points_flat': 33536, 'rms_raw_ppm': 3489.643335342407, 'rms_flat_ppm': 2277.229196109564, 'cdpp1h_flat_ppm': 2077.4277749925795, 'flatten_window_days': 1.0, 'notes': 'PDCSAP preferred; FFI(TESSCut) used where SPOC PDCSAP missing.'}


In [4]:
# 02_download_clean — Target A (PDCSAP-first, TESSCut-FFI fallback) — robust & batchable
import json, warnings, pathlib, re
import numpy as np
import matplotlib.pyplot as plt
warnings.filterwarnings("ignore")

# --- Config (single-target defaults) ---
TARGET_TOI="TOI 139.01"
TARGET_TIC=62483237
QUALITY    = 175
CUTOUT_SZ  = 15
WIN_DAYS   = 1.0

# --- Folders ---
pathlib.Path("figures").mkdir(exist_ok=True)
pathlib.Path("results").mkdir(exist_ok=True)
pathlib.Path("data_raw_fresh").mkdir(exist_ok=True)

# --- Helpers ---
def window_len_for(t_days, window_days=1.0):
    dt = np.nanmedian(np.diff(t_days))
    wl = 401 if (not np.isfinite(dt) or dt<=0) else max(5, int(round(window_days/dt)))
    return wl if wl % 2 == 1 else wl + 1

def savgol_trend(t, y, window_len, polyorder=2):
    half = window_len//2
    trend = np.full_like(y, np.nan, dtype=float)
    for i in range(len(y)):
        lo, hi = max(0, i-half), min(len(y), i+half+1)
        tt = t[lo:hi] - t[i]; yy = y[lo:hi]
        m  = np.isfinite(tt) & np.isfinite(yy)
        if np.count_nonzero(m) >= polyorder+1:
            c = np.polyfit(tt[m], yy[m], polyorder)
            trend[i] = np.polyval(c, 0.0)
    trend[~np.isfinite(trend)] = np.nanmedian(y)
    return trend

def custom_flatten(lc, window_days=1.0, polyorder=2):
    """Return a *new* LightCurve with flattened flux (no .replace() calls)."""
    t = lc.time.value
    f = lc.flux.value.astype(float)
    wl = window_len_for(t, window_days)
    tr = savgol_trend(t, f, wl, polyorder=polyorder)
    flat = f/np.where(np.isfinite(tr), tr, np.nanmedian(f))
    flat /= np.nanmedian(flat)
    # carry flux_err if present
    try:
        ferr = lc.flux_err.value if getattr(lc, "flux_err", None) is not None else None
    except Exception:
        ferr = None
    from lightkurve import LightCurve
    return LightCurve(time=lc.time, flux=flat, flux_err=ferr, meta=dict(getattr(lc, "meta", {})))

def approx_cdpp_ppm(lc, hours=1.0):
    t = lc.time.value; f = lc.flux.value
    dt_hr = np.nanmedian(np.diff(t))*24.0
    if not np.isfinite(dt_hr) or dt_hr<=0: return float("nan")
    n = max(1, int(round(hours/dt_hr)))
    run = np.convolve(f - np.nanmedian(f), np.ones(n)/n, mode="valid")
    return float(np.nanstd(run)*1e6)

def get_header_sector(header):
    # Try common sector keys; then regex fallback
    for k in ("SECTOR","sector","SEC1"):
        try:
            v = header.get(k)
            if v is not None:
                return int(v)
        except Exception:
            pass
    txt = " ".join([f"{k}={header.get(k)}" for k in header.keys()])
    m = re.search(r"[Ss]ector[^0-9]*([0-9]{1,3})", txt)
    return int(m.group(1)) if m else None

# --- Lightkurve ---
import lightkurve as lk
from lightkurve import LightCurveCollection

def run_download_clean(target_toi: str, target_tic: int, quality=QUALITY, cutout_sz=CUTOUT_SZ, win_days=WIN_DAYS):
    TARGET = f"TIC {target_tic}"
    out_fig  = f"figures/TIC{target_tic}_download_clean.png"
    out_json = f"results/TIC{target_tic}_download_clean_summary.json"

    print(f"Target: {target_toi} ({TARGET})")

    # -------------------------
    # 1) Download SPOC PDCSAP first; read sector from files (robust to missing 'sector' column)
    # -------------------------
    pdcsap_list, got_spoc_sectors = [], set()
    sr_spoc = lk.search_lightcurvefile(TARGET, mission="TESS", author="SPOC")

    if len(sr_spoc) == 0:
        print("No SPOC PDCSAP products found.")
    else:
        for i in range(len(sr_spoc)):
            try:
                lcf = sr_spoc[i].download(download_dir="data_raw_fresh")
                lc  = lcf.PDCSAP_FLUX.remove_nans().normalize()
                # sector from object or header
                s = getattr(lcf, "sector", None)
                if s is None:
                    try:
                        hdr = lcf.header() if hasattr(lcf, "header") else lcf.get_header()
                    except Exception:
                        hdr = None
                    if hdr is not None:
                        s = get_header_sector(hdr)
                if s is None: s = -1
                lc.meta["source"] = "PDCSAP"; lc.meta["sector"] = int(s)
                pdcsap_list.append(lc)
                if s != -1: got_spoc_sectors.add(int(s))
                print(f" PDCSAP S{int(s) if s!=-1 else '?'}: ok (N={len(lc)})")
            except Exception as e:
                print(f" PDCSAP entry {i}: failed -> {e}")

    # -------------------------
    # 2) Query TESSCut and add FFI for sectors not covered by PDCSAP
    # -------------------------
    ffi_list = []
    try:
        sr_cut = lk.search_tesscut(TARGET)
    except Exception as e:
        sr_cut = []
        print("TESSCut query failed:", e)

    if len(sr_cut) == 0:
        print("No TESSCut entries found.")
    else:
        for i in range(len(sr_cut)):
            try:
                tpf = sr_cut[i].download(cutout_size=cutout_sz, download_dir="data_raw_fresh")
                # sector from object or header
                s = getattr(tpf, "sector", None)
                if s is None:
                    try:
                        hdr = tpf.hdu[0].header
                    except Exception:
                        hdr = None
                    if hdr is not None:
                        s = get_header_sector(hdr)
                if s is None: s = -1
                # skip if PDCSAP already covers
                if (s != -1) and (int(s) in got_spoc_sectors):
                    print(f"  FFI S{int(s)}: skip (already have PDCSAP)")
                    continue
                # aperture photometry
                mask = tpf.create_threshold_mask(threshold=3, reference_pixel=None)
                if not np.any(mask):
                    m = np.zeros_like(mask, dtype=bool)
                    yy, xx = np.unravel_index(np.nanargmax(np.nanmedian(tpf.flux.value, axis=0)), mask.shape)
                    m[yy, xx] = True
                    mask = m
                lc = tpf.to_lightcurve(aperture_mask=mask).remove_nans().normalize()
                lc.meta["source"] = "FFI"; lc.meta["sector"] = int(s)
                ffi_list.append(lc)
                print(f"  FFI S{int(s) if s!=-1 else '?'}: ok (N={len(lc)})")
            except Exception as e:
                print(f"  FFI entry {i}: failed -> {e}")

    if not (pdcsap_list or ffi_list):
        raise RuntimeError("No light curves obtained from SPOC or TESSCut.")

    # -------------------------
    # 3) Stitch, flatten, QC
    # -------------------------
    all_lcs_raw   = pdcsap_list + ffi_list
    stitched_raw  = LightCurveCollection(all_lcs_raw).stitch().remove_nans()
    stitched_flat = custom_flatten(stitched_raw, window_days=win_days)

    def rms_ppm(x): return float(np.nanstd(x)*1e6)
    qc = {
        "target_toi": target_toi,
        "target_tic": target_tic,
        "n_sectors_pdcsap": len(pdcsap_list),
        "n_sectors_ffi": len(ffi_list),
        "sectors_pdcsap": [int(lc.meta.get("sector", -1)) for lc in pdcsap_list],
        "sectors_ffi": [int(lc.meta.get("sector", -1)) for lc in ffi_list],
        "n_points_raw": int(len(stitched_raw)),
        "n_points_flat": int(len(stitched_flat)),
        "rms_raw_ppm": rms_ppm(stitched_raw.flux.value),
        "rms_flat_ppm": rms_ppm(stitched_flat.flux.value),
        "cdpp1h_flat_ppm": approx_cdpp_ppm(stitched_flat, hours=1.0),
        "flatten_window_days": float(win_days),
        "notes": "PDCSAP preferred; FFI(TESSCut) used where SPOC PDCSAP missing."
    }

    # -------------------------
    # 4) Figure & JSON
    # -------------------------
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12,6), dpi=140, sharex=True)
    stitched_raw.plot(ax=ax1, marker=".", lw=0, alpha=0.35)
    ax1.set_ylabel("Flux (norm)")
    ax1.set_title(f"{target_toi} (TIC {target_tic}) — Stitched RAW")
    stitched_flat.plot(ax=ax2, marker=".", lw=0, alpha=0.45, label="flattened")
    ax2.set_xlabel("Time [BTJD]")
    ax2.set_ylabel("Flux (norm)")
    ax2.set_title(f"{target_toi} — Stitched FLAT (window={win_days:.2f} d)")
    ax2.legend(loc="best")
    fig.tight_layout()
    fig.savefig(out_fig); plt.close(fig)

    with open(out_json, "w") as f:
        json.dump(qc, f, indent=2)

    print("\n== DONE ==")
    print(f"Saved figure: {out_fig}")
    print(f"Saved JSON  : {out_json}")
    print("Summary:", qc)

    return qc, out_fig, out_json

# ---------- Run on Target A (deliverable) ----------
_ = run_download_clean(TARGET_TOI, TARGET_TIC, quality=QUALITY, cutout_sz=CUTOUT_SZ, win_days=WIN_DAYS)

# ---------- OPTIONAL: batch-run first 4 priority targets ----------
# Set RUN_BATCH=True to run on the first 4 rows in results/priority_targets.csv
RUN_BATCH = False
if RUN_BATCH:
    import pandas as pd
    dfp = pd.read_csv("results/priority_targets.csv")
    # Expect columns: TIC_ID_norm, toi (string/number). Adjust if your headers differ.
    for _, row in dfp.head(4).iterrows():
        tic = int(row["TIC_ID_norm"])
        toi = f"TOI {row['toi']}"
        try:
            run_download_clean(toi, tic, quality=QUALITY, cutout_sz=CUTOUT_SZ, win_days=WIN_DAYS)
        except Exception as e:
            print(f"[Batch] {toi} (TIC {tic}) failed: {e}")

Target: TOI 139.01 (TIC 62483237)
 PDCSAP S1: ok (N=18094)
 PDCSAP S28: ok (N=77241)
 PDCSAP S28: ok (N=12584)
 PDCSAP S68: ok (N=87527)
 PDCSAP S68: ok (N=14710)
  FFI S1: skip (already have PDCSAP)
  FFI S28: skip (already have PDCSAP)
  FFI S68: skip (already have PDCSAP)
  FFI S95: ok (N=10114)

== DONE ==
Saved figure: figures/TIC62483237_download_clean.png
Saved JSON  : results/TIC62483237_download_clean_summary.json
Summary: {'target_toi': 'TOI 139.01', 'target_tic': 62483237, 'n_sectors_pdcsap': 5, 'n_sectors_ffi': 1, 'sectors_pdcsap': [1, 28, 28, 68, 68], 'sectors_ffi': [95], 'n_points_raw': 220270, 'n_points_flat': 220270, 'rms_raw_ppm': 368080.16896247864, 'rms_flat_ppm': 167808.4942875445, 'cdpp1h_flat_ppm': 84620.20798996135, 'flatten_window_days': 1.0, 'notes': 'PDCSAP preferred; FFI(TESSCut) used where SPOC PDCSAP missing.'}


In [7]:
# 02_download_clean — PDCSAP-first, TESSCut-FFI fallback (ONE-CELL VERSION)
# “PDCSAP first; FFI fallback; stitched + flattened; saves figure + QC.”
# ---- EDIT THESE TWO LINES ONLY ----
TARGET_TOI = "TOI 1801.01"   # label for plots
TARGET_TIC = 119584412       # integer TIC ID
# -----------------------------------

import json, warnings, pathlib, re
import numpy as np
import matplotlib.pyplot as plt
warnings.filterwarnings("ignore")

# --- Config ---
QUALITY    = 175         # standard TESS quality mask (matches Lightkurve’s default)
CUTOUT_SZ  = 15          # TESSCut postage-stamp size (pixels)
WIN_DAYS   = 1.0         # flattening window (days) – gentle, won’t erase hour-long dips
OUT_FIG    = f"figures/TIC{TARGET_TIC}_download_clean.png"
OUT_JSON   = f"results/TIC{TARGET_TIC}_download_clean_summary.json"
TARGET     = f"TIC {TARGET_TIC}"

# --- Make folders ---
for d in ["figures", "results", "data_raw_fresh"]:
    pathlib.Path(d).mkdir(exist_ok=True)

# --- Helpers ---
def window_len_for(t_days, window_days=1.0):
    dt = np.nanmedian(np.diff(t_days))
    wl = 401 if (not np.isfinite(dt) or dt<=0) else max(5, int(round(window_days/dt)))
    return wl if wl % 2 == 1 else wl + 1

def savgol_trend(t, y, window_len, polyorder=2):
    half = window_len//2
    trend = np.full_like(y, np.nan, dtype=float)
    for i in range(len(y)):
        lo, hi = max(0, i-half), min(len(y), i+half+1)
        tt = t[lo:hi] - t[i]; yy = y[lo:hi]
        m = np.isfinite(tt) & np.isfinite(yy)
        if np.count_nonzero(m) >= polyorder+1:
            c = np.polyfit(tt[m], yy[m], polyorder)
            trend[i] = np.polyval(c, 0.0)
    trend[~np.isfinite(trend)] = np.nanmedian(y)
    return trend

def custom_flatten(lc, window_days=1.0, polyorder=2):
    """Return a NEW LightCurve with same time, flattened flux (median=1)."""
    from lightkurve import LightCurve
    t = lc.time.value
    f = lc.flux.value.astype(float)
    wl = window_len_for(t, window_days)
    tr = savgol_trend(t, f, wl, polyorder=polyorder)
    flat = f/np.where(np.isfinite(tr), tr, np.nanmedian(f))
    flat /= np.nanmedian(flat)
    return LightCurve(time=lc.time, flux=flat, flux_err=getattr(lc, "flux_err", None), meta=lc.meta)

def approx_cdpp_ppm(lc, hours=1.0):
    t = lc.time.value; f = lc.flux.value
    dt_hr = np.nanmedian(np.diff(t))*24.0
    if not np.isfinite(dt_hr) or dt_hr<=0: return float("nan")
    n = max(1, int(round(hours/dt_hr)))
    run = np.convolve(f - np.nanmedian(f), np.ones(n)/n, mode="valid")
    return float(np.nanstd(run)*1e6)

def get_header_sector(header):
    for k in ("SECTOR","sector","SEC1"):
        try:
            v = header.get(k)
            if v is not None:
                return int(v)
        except Exception:
            pass
    txt = " ".join([f"{k}={header.get(k)}" for k in header.keys()])
    m = re.search(r"[Ss]ector[^0-9]*([0-9]{1,3})", txt)
    return int(m.group(1)) if m else None

def robust_ylim(flux, lo=0.2, hi=99.8):
    lo_, hi_ = np.nanpercentile(flux, [lo, hi])
    pad = 0.03*(hi_ - lo_)
    return lo_-pad, hi_+pad

def apply_quality_mask(lc, bitmask=QUALITY):
    """Keep cadences where (quality & ~bitmask)==0, i.e., standard good cadences."""
    q = getattr(lc, "quality", None)
    if q is None:
        return lc
    good = (q & ~bitmask) == 0
    return lc[good]

# --- Lightkurve imports ---
import lightkurve as lk
from lightkurve import LightCurveCollection

print(f"Target: {TARGET_TOI} ({TARGET})")

# =========================
# 1) PDCSAP (SPOC) download
# =========================
pdcsap_list, got_spoc_sectors = [], set()
sr_spoc = lk.search_lightcurvefile(TARGET, mission="TESS", author="SPOC")

if len(sr_spoc) == 0:
    print("No SPOC PDCSAP products found.")
else:
    for i in range(len(sr_spoc)):
        try:
            lcf = sr_spoc[i].download(download_dir="data_raw_fresh")
            lc  = lcf.PDCSAP_FLUX.remove_nans().normalize()
            lc  = apply_quality_mask(lc, QUALITY)

            # sector from object or header
            s = getattr(lcf, "sector", None)
            if s is None:
                hdr = None
                try:
                    hdr = lcf.header() if hasattr(lcf, "header") else lcf.get_header()
                except Exception:
                    pass
                if hdr is not None:
                    s = get_header_sector(hdr)
            if s is None: s = -1

            lc.meta["source"] = "PDCSAP"; lc.meta["sector"] = int(s)
            pdcsap_list.append(lc)
            if s != -1: got_spoc_sectors.add(int(s))
            print(f" PDCSAP S{int(s) if s!=-1 else '?'}: ok (N={len(lc)})")
        except Exception as e:
            print(f" PDCSAP entry {i}: failed -> {e}")

# ==============================
# 2) TESSCut (FFI) for gaps only
# ==============================
ffi_list = []
try:
    sr_cut = lk.search_tesscut(TARGET)
except Exception as e:
    sr_cut = []
    print("TESSCut query failed:", e)

if len(sr_cut) == 0:
    print("No TESSCut entries found.")
else:
    for i in range(len(sr_cut)):
        try:
            tpf = sr_cut[i].download(cutout_size=CUTOUT_SZ, download_dir="data_raw_fresh")
            s = getattr(tpf, "sector", None)
            if s is None:
                hdr = None
                try:
                    hdr = tpf.hdu[0].header
                except Exception:
                    pass
                if hdr is not None:
                    s = get_header_sector(hdr)
            if s is None: s = -1

            if (s != -1) and (int(s) in got_spoc_sectors):
                print(f"  FFI S{int(s)}: skip (already have PDCSAP)")
                continue

            mask = tpf.create_threshold_mask(threshold=3, reference_pixel=None)
            if not np.any(mask):
                m = np.zeros_like(mask, dtype=bool)
                yy, xx = np.unravel_index(np.nanargmax(np.nanmedian(tpf.flux.value, axis=0)), mask.shape)
                m[yy, xx] = True
                mask = m

            lc = tpf.to_lightcurve(aperture_mask=mask).remove_nans().normalize()
            lc = apply_quality_mask(lc, QUALITY)
            lc.meta["source"] = "FFI"; lc.meta["sector"] = int(s)
            ffi_list.append(lc)
            print(f"  FFI S{int(s) if s!=-1 else '?'}: ok (N={len(lc)})")
        except Exception as e:
            print(f"  FFI entry {i}: failed -> {e}")

if not (pdcsap_list or ffi_list):
    raise RuntimeError("No light curves obtained from SPOC or TESSCut.")

# =======================
# 3) Stitch, clean, QC
# =======================
all_lcs_raw   = pdcsap_list + ffi_list
stitched_raw  = LightCurveCollection(all_lcs_raw).stitch().remove_nans()

# very light outlier clip (protects plots; safe for downloader stage)
f = stitched_raw.flux.value
med, sig = np.nanmedian(f), np.nanstd(f)
m = (f > med - 5*sig) & (f < med + 5*sig)
stitched_raw = stitched_raw[m]

stitched_flat = custom_flatten(stitched_raw, window_days=WIN_DAYS)

def rms_ppm(x): return float(np.nanstd(x)*1e6)
qc = {
    "target_toi": TARGET_TOI,
    "target_tic": int(TARGET_TIC),
    "n_sectors_pdcsap": len(pdcsap_list),
    "n_sectors_ffi": len(ffi_list),
    "sectors_pdcsap": [int(lc.meta.get("sector", -1)) for lc in pdcsap_list],
    "sectors_ffi": [int(lc.meta.get("sector", -1)) for lc in ffi_list],
    "n_points_raw": int(len(stitched_raw)),
    "n_points_flat": int(len(stitched_flat)),
    "rms_raw_ppm": rms_ppm(stitched_raw.flux.value),
    "rms_flat_ppm": rms_ppm(stitched_flat.flux.value),
    "cdpp1h_flat_ppm": approx_cdpp_ppm(stitched_flat, hours=1.0),
    "flatten_window_days": WIN_DAYS,
    "notes": "PDCSAP preferred; FFI(TESSCut) only when SPOC PDCSAP is missing."
}

# =======================
# 4) Figure + JSON output
# =======================
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12,6), dpi=140, sharex=True)
t0 = 2457000.0

ax1.scatter(stitched_raw.time.value, stitched_raw.flux.value, s=2, alpha=0.35)
ax1.set_ylabel("Flux (norm)")
ax1.set_title(f"{TARGET_TOI} ({TARGET}) — Stitched RAW")

ax2.scatter(stitched_flat.time.value, stitched_flat.flux.value, s=2, alpha=0.45, label="flattened")
ax2.set_xlabel("Time [BTJD]")
ax2.set_ylabel("Flux (norm)")
ax2.set_title(f"{TARGET_TOI} — Stitched FLAT (window={WIN_DAYS:.2f} d)")
ax2.legend(loc="best")

# keep y-axes sensible even if an FFI chunk is wild
y1_lo, y1_hi = robust_ylim(stitched_raw.flux.value)
y2_lo, y2_hi = robust_ylim(stitched_flat.flux.value)
ax1.set_ylim(y1_lo, y1_hi)
ax2.set_ylim(y2_lo, y2_hi)

fig.tight_layout()
fig.savefig(OUT_FIG); plt.close(fig)

with open(OUT_JSON, "w") as f:
    json.dump(qc, f, indent=2)

print("\n== DONE ==")
print(f"Saved figure: {OUT_FIG}")
print(f"Saved JSON  : {OUT_JSON}")
print("Summary:", qc)

Target: TOI 1801.01 (TIC 119584412)
 PDCSAP S22: ok (N=16101)
 PDCSAP S49: ok (N=13272)
  FFI S22: skip (already have PDCSAP)
  FFI S49: skip (already have PDCSAP)

== DONE ==
Saved figure: figures/TIC119584412_download_clean.png
Saved JSON  : results/TIC119584412_download_clean_summary.json
Summary: {'target_toi': 'TOI 1801.01', 'target_tic': 119584412, 'n_sectors_pdcsap': 2, 'n_sectors_ffi': 0, 'sectors_pdcsap': [22, 49], 'sectors_ffi': [], 'n_points_raw': 29370, 'n_points_flat': 29370, 'rms_raw_ppm': 1356.3033426180482, 'rms_flat_ppm': 1096.2121625978075, 'cdpp1h_flat_ppm': 251.0249996146523, 'flatten_window_days': 1.0, 'notes': 'PDCSAP preferred; FFI(TESSCut) only when SPOC PDCSAP is missing.'}


In [10]:
# === Per-sector QA (robust: handles Time/Quantity types) ===
import numpy as np, matplotlib.pyplot as plt, pathlib, json
import lightkurve as lk

TIC = TARGET_TIC
BASE = f"TIC{TIC}"
pathlib.Path("figures").mkdir(exist_ok=True)
pathlib.Path("results").mkdir(exist_ok=True)

def as_float_array(x):
    """Return a float numpy array from Lightkurve/astropy containers."""
    # Try .value (Quantity/Time), else assume it's already an array-like
    try:
        x = x.value
    except Exception:
        pass
    return np.asarray(x, dtype=float)

def rms_ppm(y):
    y = as_float_array(y)
    return float(np.nanstd(y) * 1e6)

def cdpp_ppm(time, flux, hours=1.0):
    t = as_float_array(time)
    f = as_float_array(flux)
    dt = np.nanmedian(np.diff(t))
    if not np.isfinite(dt) or dt <= 0:
        return float("nan")
    n = max(1, int(round((hours/24.0)/dt)))
    run = np.convolve(f - np.nanmedian(f), np.ones(n)/n, mode="valid")
    return float(np.nanstd(run)*1e6)

def build_quality_mask(lc, quality_bitmask):
    q = getattr(lc, "quality", None)
    if q is not None and len(q) == len(lc.flux):
        try:
            return lk.utils.TessQualityFlags.create_quality_mask(q, bitmask=quality_bitmask)
        except Exception:
            return np.ones(len(lc.flux), dtype=bool)
    return np.ones(len(lc.flux), dtype=bool)

entries = []
for src_list in (pdcsap_list, ffi_list):
    for lc in src_list:
        # sector & source tags
        try:
            sector = int(lc.meta.get("sector", -1))
        except Exception:
            sector = -1
        src_lbl = "PDCSAP" if lc.meta.get("source") == "PDCSAP" else "FFI"

        # arrays
        t_all = as_float_array(lc.time)
        f_all = as_float_array(lc.flux)

        # masks
        qmask  = build_quality_mask(lc, QUALITY)
        finite = np.isfinite(t_all) & np.isfinite(f_all)
        m = qmask & finite

        N_all = int(len(f_all))
        N_use = int(np.count_nonzero(m))
        frac_bad = float(1 - (N_use / max(N_all, 1)))

        # plot with plain arrays
        fig, ax = plt.subplots(1, 1, figsize=(11, 3), dpi=140)
        ax.plot(t_all[m], f_all[m], ".", ms=2, alpha=0.7)
        ax.set_title(f"{TARGET_TOI} S{sector if sector!=-1 else '?'} — {src_lbl}")
        ax.set_xlabel("Time [BTJD]"); ax.set_ylabel("Flux (norm)")
        out_png = f"figures/QA_{BASE}_S{sector if sector!=-1 else 'u'}.png"
        fig.tight_layout(); fig.savefig(out_png); plt.close(fig)

        # metrics
        entries.append({
            "sector": sector,
            "source": src_lbl,
            "N_all": N_all,
            "N_use": N_use,
            "frac_bad": round(frac_bad, 4),
            "rms_ppm": rms_ppm(f_all[m]),
            "cdpp1h_ppm": cdpp_ppm(t_all[m], f_all[m]),
        })

if not entries:
    raise RuntimeError("No sectors could be processed for QA (empty entries).")

# sort & save summary
entries = sorted(entries, key=lambda d: (d["sector"], d["source"]))
out_json = f"results/QA_{BASE}_summary.json"
with open(out_json, "w") as f:
    json.dump(entries, f, indent=2)

print(f"Per-sector QA complete: saved {len(entries)} plots to figures/ and summary to {out_json}")
for e in entries:
    cdpp_int = int(e['cdpp1h_ppm']) if np.isfinite(e['cdpp1h_ppm']) else 'nan'
    print(f" S{e['sector']}: {e['source']}, N_use={e['N_use']}/{e['N_all']}, "
          f"frac_bad={e['frac_bad']}, cdpp1h_ppm={cdpp_int}")

Per-sector QA complete: saved 2 plots to figures/ and summary to results/QA_TIC119584412_summary.json
 S22: PDCSAP, N_use=16101/16101, frac_bad=0.0, cdpp1h_ppm=740
 S49: PDCSAP, N_use=13272/13272, frac_bad=0.0, cdpp1h_ppm=937


In [16]:
# ==== Formalize detrend params & save pre/post metrics (Target A) ====
import json, pathlib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import lightkurve as lk
from lightkurve import LightCurveCollection

# robust import for quality flags across LK versions
try:
    from lightkurve.utils import TessQualityFlags
except Exception:
    TessQualityFlags = getattr(lk, "TessQualityFlags", None)
assert TessQualityFlags is not None, "Could not import TessQualityFlags from lightkurve."

# Reuse target if already defined; else set Target A
try:
    TARGET_TOI, TARGET_TIC, TARGET
except NameError:
    TARGET_TOI = "TOI 1801.01"
    TARGET_TIC = 119584412
    TARGET     = f"TIC {TARGET_TIC}"

# ---- detrend knobs we are "locking" today ----
DETRENDS = {
    "window_days": 1.0,      # gentle, won't eat shallow dips
    "polyorder": 2,          # quadratic local trend
    "use_transit_mask": False,
    "mask_width_factor": 1.7
}
QUALITY = 175  # TESS bitmask

# ---- helpers ----
def to_vec(x):
    """Return a float array from Lightkurve/astropy/Masked inputs; fill masked with NaN."""
    # Prefer .value if present (astropy Time/Quantity), else the object itself
    base = getattr(x, "value", x)
    arr = np.array(base)
    # If still an object array (e.g., astropy Time), try .astype(float)
    try:
        arr = arr.astype(float)
    except Exception:
        # final fallback: per-element conversion (slow but robust)
        arr = np.array([float(v) if np.isfinite(v) else np.nan for v in np.array(base, dtype=object)], dtype=float)
    # If masked, fill with nan
    if np.ma.isMaskedArray(arr):
        arr = arr.filled(np.nan)
    return arr

def window_len_for(t_days, window_days):
    dt = np.nanmedian(np.diff(t_days))
    wl = 401 if (not np.isfinite(dt) or dt <= 0) else max(5, int(round(window_days/dt)))
    return wl if wl % 2 == 1 else wl + 1

def savgol_trend(t, y, window_len, polyorder=2):
    half = window_len // 2
    trend = np.full_like(y, np.nan, dtype=float)
    for i in range(len(y)):
        lo, hi = max(0, i-half), min(len(y), i+half+1)
        tt = t[lo:hi] - t[i]; yy = y[lo:hi]
        m = np.isfinite(tt) & np.isfinite(yy)
        if np.count_nonzero(m) >= polyorder+1:
            c = np.polyfit(tt[m], yy[m], polyorder)
            trend[i] = np.polyval(c, 0.0)
    if not np.isfinite(trend).any():
        trend[:] = np.nanmedian(y)
    trend[~np.isfinite(trend)] = np.nanmedian(y)
    return trend

def flatten_simple(lc, window_days, polyorder):
    t = to_vec(lc.time)
    f = to_vec(lc.flux)
    wl = window_len_for(t, window_days)
    tr = savgol_trend(t, f, wl, polyorder=polyorder)
    flat = f / np.where(np.isfinite(tr), tr, np.nanmedian(f))
    flat /= np.nanmedian(flat)
    out = lc.copy()
    out.flux = flat
    return out

def approx_cdpp_ppm(lc, hours=1.0):
    t = to_vec(lc.time); f = to_vec(lc.flux)
    dt_hr = np.nanmedian(np.diff(t))*24.0
    if not np.isfinite(dt_hr) or dt_hr <= 0:
        return float("nan")
    n = max(1, int(round(hours/dt_hr)))
    run = np.convolve(f - np.nanmedian(f), np.ones(n)/n, mode="valid")
    return float(np.nanstd(run) * 1e6)

# ---- ensure folders ----
pathlib.Path("results").mkdir(exist_ok=True)
pathlib.Path("figures").mkdir(exist_ok=True)
pathlib.Path("data_raw_fresh").mkdir(exist_ok=True)

print(f"Detrend formalization on: {TARGET_TOI} (TIC {TARGET_TIC})")

# ---- fetch PDCSAP for A (PDCSAP-first) ----
sr = lk.search_lightcurvefile(TARGET, mission="TESS", author="SPOC")

rows = []         # ONLY numbers/strings for CSV
raw_list = []     # LightCurve objects (raw/normalized)
flat_list = []    # LightCurve objects (flattened)

for i in range(len(sr)):
    try:
        lcf = sr[i].download(download_dir="data_raw_fresh")
        sector = int(getattr(lcf, "sector", -1) or -1)

        lc0 = lcf.PDCSAP_FLUX
        qmask = TessQualityFlags.create_quality_mask(lc0.quality, bitmask=QUALITY)
        lc = lc0[qmask].remove_nans().normalize()
        print(f"  PDCSAP S{sector}: N={len(lc)}")

        lc_flat = flatten_simple(lc, DETRENDS["window_days"], DETRENDS["polyorder"])

        rows.append({
            "tic": TARGET_TIC,
            "toi": TARGET_TOI,
            "sector": sector,
            "source": "PDCSAP",
            "n_raw": int(len(lc)),
            "n_flat": int(len(lc_flat)),
            "rms_raw_ppm": float(np.nanstd(to_vec(lc.flux))*1e6),
            "rms_flat_ppm": float(np.nanstd(to_vec(lc_flat.flux))*1e6),
            "cdpp1h_raw_ppm": approx_cdpp_ppm(lc, hours=1.0),
            "cdpp1h_flat_ppm": approx_cdpp_ppm(lc_flat, hours=1.0),
            "window_days": DETRENDS["window_days"],
            "polyorder": DETRENDS["polyorder"],
            "use_transit_mask": DETRENDS["use_transit_mask"],
            "mask_width_factor": DETRENDS["mask_width_factor"],
            "quality_bitmask": QUALITY,
        })
        raw_list.append(lc)
        flat_list.append(lc_flat)
    except Exception as e:
        print(f"  PDCSAP entry {i}: failed -> {e}")

if not raw_list:
    raise RuntimeError("No PDCSAP sectors processed; aborting.")

# ---- stitched summary row ----
stitched_raw  = LightCurveCollection(raw_list).stitch().remove_nans().normalize()
stitched_flat = LightCurveCollection(flat_list).stitch().remove_nans().normalize()

rows.append({
    "tic": TARGET_TIC,
    "toi": TARGET_TOI,
    "sector": -1,  # -1 = stitched
    "source": "STITCHED",
    "n_raw": int(len(stitched_raw)),
    "n_flat": int(len(stitched_flat)),
    "rms_raw_ppm": float(np.nanstd(to_vec(stitched_raw.flux))*1e6),
    "rms_flat_ppm": float(np.nanstd(to_vec(stitched_flat.flux))*1e6),
    "cdpp1h_raw_ppm": approx_cdpp_ppm(stitched_raw, hours=1.0),
    "cdpp1h_flat_ppm": approx_cdpp_ppm(stitched_flat, hours=1.0),
    "window_days": DETRENDS["window_days"],
    "polyorder": DETRENDS["polyorder"],
    "use_transit_mask": DETRENDS["use_transit_mask"],
    "mask_width_factor": DETRENDS["mask_width_factor"],
    "quality_bitmask": QUALITY,
})

# ---- save metrics & params ----
metrics_csv = f"results/TIC{TARGET_TIC}_detrend_metrics.csv"
pd.DataFrame(rows).sort_values(["source","sector"]).to_csv(metrics_csv, index=False)

params_json = f"results/TIC{TARGET_TIC}_detrend_params.json"
with open(params_json, "w") as f:
    json.dump({
        "target_toi": TARGET_TOI,
        "target_tic": TARGET_TIC,
        "detrend_params": DETRENDS,
        "quality_bitmask": QUALITY,
        "created_by": "02_download_clean.ipynb",
    }, f, indent=2)

# ---- quick before/after plot ----
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 6), dpi=140, sharex=True)
stitched_raw.plot(ax=ax1, marker=".", lw=0, alpha=0.35)
ax1.set_ylabel("Flux (norm)")
ax1.set_title(f"{TARGET_TOI} — Stitched RAW (PDCSAP)")

stitched_flat.plot(ax=ax2, marker=".", lw=0, alpha=0.45)
ax2.set_xlabel("Time [BTJD]")
ax2.set_ylabel("Flux (norm)")
ax2.set_title(f"{TARGET_TOI} — Stitched FLAT (window={DETRENDS['window_days']} d, poly={DETRENDS['polyorder']})")
fig.tight_layout()
fig_path = f"figures/TIC{TARGET_TIC}_detrend_before_after.png"
fig.savefig(fig_path); plt.close(fig)

print("\nDetrend formalization complete.")
print("Saved:", metrics_csv)
print("Saved:", params_json)
print("Saved:", fig_path)

Detrend formalization on: TOI 1801.01 (TIC 119584412)
  PDCSAP S22: N=16102
  PDCSAP S49: N=13272

Detrend formalization complete.
Saved: results/TIC119584412_detrend_metrics.csv
Saved: results/TIC119584412_detrend_params.json
Saved: figures/TIC119584412_detrend_before_after.png


In [7]:
# =============================== #
# FAST BLS→TLS (Target A)
# Per-sector and stitched:
# - BLS over a wide range (fast)
# - TLS only near top BLS peaks (narrow windows, multi-threaded)
# Saves: periodograms, top-3 CSVs, folded plots.
# =============================== #

import os, csv, time, json, warnings
import numpy as np
import matplotlib.pyplot as plt

import lightkurve as lk
from astropy.timeseries import BoxLeastSquares
from transitleastsquares import transitleastsquares

# ---- Silence Lightkurve warnings (version-safe) ----
try:
    from lightkurve.utils import LightkurveWarning
except Exception:
    class LightkurveWarning(Warning): pass
warnings.filterwarnings("ignore", category=LightkurveWarning)

# ---- Config ----
TARGET_TOI = "TOI 1801.01"
TARGET_TIC = 119584412
SECTORS    = [22, 49]                  # edit if needed

# BLS search window (days)
BLS_PERIOD_MIN = 0.5
BLS_PERIOD_MAX = 50.0
BLS_NPER       = 5000                  # keep reasonable; BLS is fast

# Durations to test in hours (BLS)
BLS_DURATIONS_HR = np.linspace(0.5, 3.0, 18)

# TLS narrow window around each BLS peak
TLS_WINDOW_FRAC  = 0.01                # ±1% around each BLS peak
TLS_THREADS      = max(1, (os.cpu_count() or 1))   # use all cores
TLS_MIN_TRANSITS = 2                   # require ≥ 2 transits (faster/stricter)

FIGDIR = "figures"; RESDIR = "results"
os.makedirs(FIGDIR, exist_ok=True)
os.makedirs(RESDIR, exist_ok=True)

# If you know star params (from TIC): helps TLS grid a bit
R_STAR = 0.55   # R_sun
M_STAR = 0.55   # M_sun

# ---- Helpers ----
def load_pdcsap_sector(tic, sector):
    """Prefer modern search_lightcurve(..., author='SPOC'); fallback to search_lightcurvefile()."""
    try:
        sr = lk.search_lightcurve(f"TIC {tic}", mission="TESS", sector=sector, author="SPOC")
        if len(sr) == 0:
            raise RuntimeError("No SPOC PDCSAP LC via search_lightcurve")
        lc = sr.download().remove_nans()
        return lc
    except Exception:
        sr2 = lk.search_lightcurvefile(f"TIC {tic}", mission="TESS", sector=sector)
        lcf = sr2.download()
        lc = lcf.PDCSAP_FLUX.remove_nans()
        return lc

def lc_to_arrays(lc):
    """Return (t,f) float arrays, normalized by median; robust to masked arrays/NaNs."""
    t = getattr(lc.time, "value", lc.time)
    f = getattr(lc.flux, "value", lc.flux)
    t = np.asarray(t, dtype=float)
    f = np.asarray(f, dtype=float)
    if np.ma.isMaskedArray(t): t = t.filled(np.nan)
    if np.ma.isMaskedArray(f): f = f.filled(np.nan)
    m = np.isfinite(t) & np.isfinite(f)
    f_med = np.nanmedian(f[m]) if np.any(m) else 1.0
    if not np.isfinite(f_med) or f_med == 0: f_med = 1.0
    return t[m], (f[m]/f_med)

def unique_peaks(periods, power, k=3, tol_frac=0.01):
    """Pick top-k unique periods (avoid near-duplicates within tol_frac)."""
    idx = np.argsort(power)[::-1]
    picks = []
    for i in idx:
        p = periods[i]
        if all(abs(p - q)/q > tol_frac for q in picks):
            picks.append(p)
        if len(picks) == k:
            break
    return picks

def plot_periodogram(x, y, xlabel, title, outpng):
    plt.figure(figsize=(8,4), dpi=140)
    plt.plot(x, y, lw=1)
    plt.xlabel(xlabel); plt.ylabel("Power"); plt.title(title)
    plt.tight_layout(); plt.savefig(outpng); plt.close()

def fold_and_plot(t, f, period, t0, title, outpng, nbins=200):
    phase = ((t - t0 + 0.5*period) % period) / period - 0.5
    order = np.argsort(phase); phase, f = phase[order], f[order]
    bins = np.linspace(-0.5, 0.5, nbins+1)
    which = np.digitize(phase, bins) - 1
    yb = np.array([np.nanmean(f[which==i]) if np.any(which==i) else np.nan for i in range(nbins)])
    xb = 0.5*(bins[:-1]+bins[1:])
    plt.figure(figsize=(8,4), dpi=140)
    plt.plot(phase, f, ".", ms=2, alpha=0.35)
    plt.plot(xb, yb, "-", lw=1.5)
    plt.axvline(0.0, color="k", lw=1, alpha=0.3)
    plt.xlabel("Phase (cycles)"); plt.ylabel("Relative flux"); plt.title(title)
    plt.tight_layout(); plt.savefig(outpng); plt.close()

def append_csv(path, rows, header=None):
    new = not os.path.exists(path)
    with open(path, "a", newline="") as f:
        w = csv.writer(f)
        if new and header: w.writerow(header)
        for r in rows: w.writerow(r)

def bls_power_safe(t, f, periods, durations):
    """Run BLS, trying objective='snr' first; if not supported, fall back."""
    bls = BoxLeastSquares(t, f)
    try:
        res = bls.power(periods, durations, objective="snr")
    except TypeError:
        res = bls.power(periods, durations)
    return res

def tls_narrow(t, f, p_center, frac=TLS_WINDOW_FRAC, nthreads=TLS_THREADS, nmin=TLS_MIN_TRANSITS):
    """
    TLS around a single candidate period (±frac). Returns (period, SDE, T0, res).
    NOTE: R_star and M_star must be passed to .power(...), not the constructor.
    """
    tls = transitleastsquares(t, f)
    pmin = p_center*(1-frac)
    pmax = p_center*(1+frac)
    if not np.isfinite(pmin) or not np.isfinite(pmax) or pmin <= 0 or pmax <= pmin:
        pmin, pmax = max(0.5, p_center*0.98), p_center*1.02

    # Try with all threads; if TLS complains, fall back to 1 thread.
    try:
        res = tls.power(
            period_min=pmin, period_max=pmax,
            show_progress_bar=True,
            use_threads=int(nthreads),
            n_transits_min=int(nmin),
            R_star=R_STAR, M_star=M_STAR
        )
    except ValueError as e:
        if "use_threads" in str(e):
            res = tls.power(
                period_min=pmin, period_max=pmax,
                show_progress_bar=True,
                use_threads=1,
                n_transits_min=int(nmin),
                R_star=R_STAR, M_star=M_STAR
            )
        else:
            # If window was too tight for a grid, widen it a bit
            res = tls.power(
                period_min=p_center*(1-2*frac), period_max=p_center*(1+2*frac),
                show_progress_bar=True,
                use_threads=int(nthreads),
                n_transits_min=int(nmin),
                R_star=R_STAR, M_star=M_STAR
            )

    return float(res.period), float(res.SDE), float(res.T0), res

def run_block(label, t, f):
    """Run BLS wide (clamped to data span), then TLS narrow for top-3; save artifacts."""
    print(f"\n[{label}] points={t.size}  threads={TLS_THREADS}")

    # ---- BLS (wide, but clamp to data span for per-sector speed) ----
    t0 = time.time()
    span = float(np.nanmax(t) - np.nanmin(t))
    bls_pmax = min(BLS_PERIOD_MAX, max(BLS_PERIOD_MIN*1.2, 0.90*span))
    periods   = np.linspace(BLS_PERIOD_MIN, bls_pmax, BLS_NPER)
    durations = BLS_DURATIONS_HR / 24.0
    bls_res   = bls_power_safe(t, f, periods, durations)
    print(f"[{label}] BLS done in {time.time()-t0:.1f}s (Pmax used={bls_pmax:.2f} d)")

    plot_periodogram(
        bls_res.period, bls_res.power,
        "Period (days)", f"{TARGET_TOI} ({label}) — BLS periodogram",
        f"{FIGDIR}/TIC{TARGET_TIC}_{label}_BLS_periodogram.png"
    )

    bls_topP = unique_peaks(bls_res.period, bls_res.power, k=3, tol_frac=0.01)
    append_csv(
        f"{RESDIR}/TIC{TARGET_TIC}_{label}_BLS_top3.csv",
        [[TARGET_TIC, TARGET_TOI, label, float(p),
          float(bls_res.power[np.argmin(abs(bls_res.period-p))])] for p in bls_topP],
        header=["tic","toi","label","period_days","power"]
    )

    # ---- TLS (narrow around each BLS peak) ----
    tls_rows = []
    for p in bls_topP:
        print(f"[{label}] TLS refine around {p:.5f} d (±{TLS_WINDOW_FRAC*100:.1f}%) …")
        t1 = time.time()
        p_best, sde, t0_best, res = tls_narrow(t, f, p)
        print(f"[{label}]   TLS best P={p_best:.6f} d, SDE={sde:.2f} (took {time.time()-t1:.1f}s)")
        tls_rows.append([TARGET_TIC, TARGET_TOI, label, p_best, sde, t0_best])

        # Save TLS periodogram and fold for this candidate
        plot_periodogram(
            res.periods, res.power,
            "Period (days)", f"{TARGET_TOI} ({label}) — TLS @ {p:.5f}±{TLS_WINDOW_FRAC*100:.1f}%",
            f"{FIGDIR}/TIC{TARGET_TIC}_{label}_TLS_periodogram_around_{p:.5f}.png"
        )
        fold_and_plot(
            t, f, p_best, t0_best,
            f"{TARGET_TOI} ({label}) — TLS fold @ P={p_best:.5f} d",
            f"{FIGDIR}/TIC{TARGET_TIC}_{label}_TLS_fold_P{p_best:.5f}.png"
        )

    append_csv(
        f"{RESDIR}/TIC{TARGET_TIC}_{label}_TLS_top3.csv",
        tls_rows, header=["tic","toi","label","period_days","SDE","T0_BTJD"]
    )

# =============================== #
# Run per-sector
# =============================== #
t_all_list, f_all_list = [], []
for s in SECTORS:
    lc = load_pdcsap_sector(TARGET_TIC, s).normalize()
    t, f = lc_to_arrays(lc)
    print(f"{TARGET_TOI} — S{s}: N={t.size}")
    run_block(f"S{s}", t, f)
    t_all_list.append(t); f_all_list.append(f)

# =============================== #
# Run stitched (combined)
# =============================== #
t_all = np.concatenate(t_all_list); f_all = np.concatenate(f_all_list)
order = np.argsort(t_all); t_all, f_all = t_all[order], f_all[order]
run_block("stitched", t_all, f_all)

print("\nDone: fast BLS→TLS complete (per-sector + stitched). Artifacts saved in figures/ and results/.")

TOI 1801.01 — S22: N=16102

[S22] points=16102  threads=8




[S22] BLS done in 2.0s (Pmax used=23.92 d)
[S22] TLS refine around 14.97339 d (±1.0%) …
Transit Least Squares TLS 1.32 (5 Apr 2024)
Creating model cache for 38 durations
Searching 16102 data points, 2414 periods from 0.601 to 13.29 days
Using all 8 CPU threads


100%|██████████████████████████████████████████| 2414/2414 periods | 00:09<00:00


Searching for best T0 for period 13.26054 days
[S22]   TLS best P=13.260536 d, SDE=9.34 (took 10.5s)
[S22] TLS refine around 15.98076 d (±1.0%) …
Transit Least Squares TLS 1.32 (5 Apr 2024)
Creating model cache for 38 durations
Searching 16102 data points, 2414 periods from 0.601 to 13.29 days
Using all 8 CPU threads


100%|██████████████████████████████████████████| 2414/2414 periods | 00:08<00:00


Searching for best T0 for period 13.26054 days
[S22]   TLS best P=13.260536 d, SDE=9.34 (took 9.5s)
[S22] TLS refine around 14.75786 d (±1.0%) …
Transit Least Squares TLS 1.32 (5 Apr 2024)
Creating model cache for 38 durations
Searching 16102 data points, 2414 periods from 0.601 to 13.29 days
Using all 8 CPU threads


100%|██████████████████████████████████████████| 2414/2414 periods | 00:09<00:00


Searching for best T0 for period 13.26054 days
[S22]   TLS best P=13.260536 d, SDE=9.34 (took 10.5s)
TOI 1801.01 — S49: N=13272

[S49] points=13272  threads=8




[S49] BLS done in 1.8s (Pmax used=21.49 d)
[S49] TLS refine around 5.37177 d (±1.0%) …
Transit Least Squares TLS 1.32 (5 Apr 2024)
Creating model cache for 37 durations
Searching 13272 data points, 2125 periods from 0.601 to 11.942 days
Using all 8 CPU threads


100%|██████████████████████████████████████████| 2125/2125 periods | 00:06<00:00


Searching for best T0 for period 10.93615 days
[S49]   TLS best P=10.936152 d, SDE=7.15 (took 7.5s)
[S49] TLS refine around 5.43477 d (±1.0%) …
Transit Least Squares TLS 1.32 (5 Apr 2024)
Creating model cache for 37 durations
Searching 13272 data points, 2125 periods from 0.601 to 11.942 days
Using all 8 CPU threads


100%|██████████████████████████████████████████| 2125/2125 periods | 00:06<00:00


Searching for best T0 for period 10.93615 days
[S49]   TLS best P=10.936152 d, SDE=7.15 (took 7.4s)
[S49] TLS refine around 5.31297 d (±1.0%) …
Transit Least Squares TLS 1.32 (5 Apr 2024)
Creating model cache for 37 durations
Searching 13272 data points, 2125 periods from 0.601 to 11.942 days
Using all 8 CPU threads


100%|██████████████████████████████████████████| 2125/2125 periods | 00:06<00:00


Searching for best T0 for period 10.93615 days
[S49]   TLS best P=10.936152 d, SDE=7.15 (took 6.9s)

[stitched] points=29374  threads=8
[stitched] BLS done in 4.0s (Pmax used=50.00 d)
[stitched] TLS refine around 16.02631 d (±1.0%) …
Transit Least Squares TLS 1.32 (5 Apr 2024)
Creating model cache for 29 durations
Searching 29374 data points, 359 periods from 15.866 to 16.187 days
Using all 8 CPU threads


100%|████████████████████████████████████████████| 359/359 periods | 00:05<00:00


Searching for best T0 for period 16.02719 days




[stitched]   TLS best P=16.027187 d, SDE=2.30 (took 6.4s)
[stitched] TLS refine around 15.02621 d (±1.0%) …
Transit Least Squares TLS 1.32 (5 Apr 2024)
Creating model cache for 29 durations
Searching 29374 data points, 366 periods from 14.876 to 15.176 days
Using all 8 CPU threads


100%|████████████████████████████████████████████| 366/366 periods | 00:04<00:00


Searching for best T0 for period 15.04600 days




[stitched]   TLS best P=15.046000 d, SDE=1.73 (took 5.6s)
[stitched] TLS refine around 15.67974 d (±1.0%) …
Transit Least Squares TLS 1.32 (5 Apr 2024)
Creating model cache for 29 durations
Searching 29374 data points, 361 periods from 15.524 to 15.836 days
Using all 8 CPU threads


100%|████████████████████████████████████████████| 361/361 periods | 00:04<00:00


Searching for best T0 for period 15.80476 days


  snr_pink_per_transit[i] = (1 - mean_flux) / pinknoise


[stitched]   TLS best P=15.804763 d, SDE=1.71 (took 5.6s)

Done: fast BLS→TLS complete (per-sector + stitched). Artifacts saved in figures/ and results/.


In [14]:
import os, warnings
import numpy as np
import lightkurve as lk
from transitleastsquares import transitleastsquares

# ------------ Config ------------
TARGET_TIC = 119584412
TLS_THREADS = max(1, os.cpu_count() or 1)   # use all available cores
TLS_WINDOW_FRAC = 0.01                      # ±1% around a candidate period
TLS_MIN_TRANSITS = 2

# Rough stellar params (OK for TLS grid spacing)
R_STAR = 0.55    # ~Rsun (from your Target A sheet)
M_STAR = 0.55    # ~Msun

# Quiet some noisy warnings
warnings.filterwarnings("ignore", category=lk.LightkurveWarning)

# ------------ Helpers ------------
def load_pdcsap_sector(tic, sector):
    """Download PDCSAP for one sector and return a LightCurve (NaNs removed, normalized)."""
    sr = lk.search_lightcurve(f"TIC {tic}", mission="TESS", sector=sector, exptime=120)
    if len(sr) == 0:
        raise RuntimeError(f"No TESS LC found for TIC {tic} sector {sector}")
    lcf = sr.download(quality_bitmask="hard")
    lc = lcf.PDCSAP_FLUX.remove_nans().normalize()
    return lc

def lc_to_arrays(lc):
    """Convert LightCurve to clean float arrays (t, f), handling masked arrays."""
    t = np.asarray(getattr(lc.time, "value", lc.time), dtype=float)
    f = np.asarray(getattr(lc.flux, "value", lc.flux))
    if np.ma.isMaskedArray(f):
        f = f.filled(np.nan)
    good = np.isfinite(t) & np.isfinite(f)
    return t[good], f[good]

def tls_narrow(t, f, p_center, frac=TLS_WINDOW_FRAC,
               nthreads=TLS_THREADS, nmin=TLS_MIN_TRANSITS):
    """Run TLS in a narrow window around p_center (±frac). Returns best (P, SDE, T0, res)."""
    tls = transitleastsquares(t, f)
    res = tls.power(
        period_min = p_center*(1 - frac),
        period_max = p_center*(1 + frac),
        use_threads = int(max(1, nthreads)),   # TLS requires >=1
        show_progress_bar = False,
        R_star = R_STAR,
        M_star = M_STAR,
        n_transits_min = int(nmin),
    )
    return float(res.period), float(res.SDE), float(res.T0), res

In [15]:
lc = load_pdcsap_sector(TARGET_TIC, 22)  # Sector 22
t, f = lc_to_arrays(lc)

p_best, sde, t0, _ = tls_narrow(t, f, p_center=16.05)  # try around ~16 d
print(f"TLS best: P={p_best:.5f} d | SDE={sde:.2f} | T0={t0:.5f}")

Transit Least Squares TLS 1.32 (5 Apr 2024)
Creating model cache for 38 durations
Searching 16102 data points, 2414 periods from 0.601 to 13.29 days
Using all 8 CPU threads




Searching for best T0 for period 13.26054 days
TLS best: P=13.26054 d | SDE=9.34 | T0=1910.10076


In [16]:
# S22
lc = load_pdcsap_sector(TARGET_TIC, 22); t22,f22 = lc_to_arrays(lc)
p22, sde22, t0_22, _ = tls_narrow(t22, f22, p_center=16.06, nmin=1)
print(f"S22 (nmin=1): P={p22:.5f} d, SDE={sde22:.2f}")

# S49
lc = load_pdcsap_sector(TARGET_TIC, 49); t49,f49 = lc_to_arrays(lc)
p49, sde49, t0_49, _ = tls_narrow(t49, f49, p_center=16.06, nmin=1)
print(f"S49 (nmin=1): P={p49:.5f} d, SDE={sde49:.2f}")

Transit Least Squares TLS 1.32 (5 Apr 2024)
Creating model cache for 38 durations
Searching 16102 data points, 2414 periods from 0.601 to 13.29 days
Using all 8 CPU threads




Searching for best T0 for period 13.26054 days
S22 (nmin=1): P=13.26054 d, SDE=9.34
Transit Least Squares TLS 1.32 (5 Apr 2024)
Creating model cache for 37 durations
Searching 13272 data points, 2125 periods from 0.601 to 11.942 days
Using all 8 CPU threads




Searching for best T0 for period 10.93615 days
S49 (nmin=1): P=10.93615 d, SDE=7.15


In [20]:
from pathlib import Path
import csv

def clean_top3_csv(p: Path):
    with p.open() as f:
        rows = list(csv.DictReader(f))
    if not rows:
        print(f"Skipped {p.name}: empty")
        return

    # Pick which metric to sort by
    metric = "power" if "_BLS_" in p.name else "SDE"

    # Keep the best row per unique period (rounded to 5 decimals)
    best_by_period = {}
    for r in rows:
        try:
            per = round(float(r["period_days"]), 5)
            val = float(r[metric])
        except Exception:
            continue
        if per not in best_by_period or val > float(best_by_period[per][metric]):
            best_by_period[per] = r

    # Take the top-3 by metric
    cleaned = sorted(best_by_period.values(),
                     key=lambda r: float(r[metric]),
                     reverse=True)[:3]

    # Overwrite the CSV with the cleaned rows
    with p.open("w", newline="") as f:
        w = csv.DictWriter(f, fieldnames=cleaned[0].keys())
        w.writeheader()
        w.writerows(cleaned)
    print(f"Cleaned {p.name}: kept {len(cleaned)}")

# Brace expansion doesn’t work in pathlib; glob each pattern separately
paths = []
paths += list(Path("results").glob("TIC119584412_*_BLS_top3.csv"))
paths += list(Path("results").glob("TIC119584412_*_TLS_top3.csv"))

if not paths:
    print("No matching files found in results/.")
else:
    for p in sorted(paths):
        clean_top3_csv(p)

Cleaned TIC119584412_S22_BLS_top3.csv: kept 3
Cleaned TIC119584412_S22_TLS_top3.csv: kept 1
Cleaned TIC119584412_S49_BLS_top3.csv: kept 3
Cleaned TIC119584412_S49_TLS_top3.csv: kept 1
Cleaned TIC119584412_stitched_BLS_top3.csv: kept 3
Cleaned TIC119584412_stitched_TLS_top3.csv: kept 3


In [21]:
import csv, os

def append_csv(path, rows, header=None):
    """Overwrite version: replaces any existing file instead of appending."""
    os.makedirs(os.path.dirname(path), exist_ok=True)
    with open(path, "w", newline="") as f:
        w = csv.writer(f)
        if header:
            w.writerow(header)
        for r in rows:
            w.writerow(r)
    print(f"[overwrite] wrote {path} ({len(rows)} rows)")

print("append_csv is now overwrite-mode for this session.")

append_csv is now overwrite-mode for this session.
