In [None]:
"""
Incrementally updates SHARADAR SEP price files for all tickers in the S&P 500 membership matrix.
Loads the Nasdaq Data Link API key, fetches full histories for missing tickers and only new rows
after the latest date for existing files, warns when data is stale beyond MAX_STALE_DAYS, and
persists per-ticker CSVs in ./2-all_prices/sharadar_sep_full.

Use case: run after executing 2b-Bootrap_All_Prices.ipynb to keep SHARADAR SEP data up to date.
"""
import os
import pandas as pd
import numpy as np
import nasdaqdatalink
from datetime import timedelta
from dotenv import load_dotenv
load_dotenv()

nasdaqdatalink_key = os.getenv("NASDAQ_DATA_LINK_API_KEY")
nasdaqdatalink.ApiConfig.api_key = nasdaqdatalink_key


INPUT_FOLDER  = "./2-all_prices/sharadar_sep_full"
OUTPUT_FOLDER = "./2-all_prices/sharadar_sep_full"

os.makedirs(OUTPUT_FOLDER, exist_ok=True)

MEMBERSHIP_FILE = "./1-sp500_membership_daily_matrix/sp500_membership_full.parquet"
membership = pd.read_parquet(MEMBERSHIP_FILE)
all_tickers = sorted(membership.index.tolist())

MAX_STALE_DAYS = 90   # warn if no update for > 90 days


def get_incremental_sep(ticker, last_date):
    """
    Safely fetch SEP rows strictly AFTER last_date.
    Handles long gaps, weekends, holidays.
    """
    start_date = last_date + timedelta(days=1)

    df_new = nasdaqdatalink.get_table(
        "SHARADAR/SEP",
        ticker=[ticker],
        date={'gte': start_date},
        paginate=True
    )

    if df_new is None or df_new.empty:
        return pd.DataFrame()

    df_new["date"] = pd.to_datetime(df_new["date"])
    df_new = df_new.sort_values("date")

    return df_new


print("\n=== INCREMENTAL SHARADAR SEP UPDATE ===\n")

for tk in all_tickers:

    file_path = os.path.join(INPUT_FOLDER, f"{tk}.csv")

    # -------------------------------
    # CASE 1: No existing file → full download
    # -------------------------------
    if not os.path.exists(file_path):
        print(f"[NEW] {tk}: downloading FULL history...")

        df = nasdaqdatalink.get_table(
            "SHARADAR/SEP",
            ticker=[tk],
            paginate=True
        )

        if df is None or df.empty:
            print(f"⚠ No data found for {tk}")
            continue

        df["date"] = pd.to_datetime(df["date"])
        df = df.sort_values("date").drop_duplicates(subset=["date"])

        df.to_csv(file_path, index=False)
        continue

    # -------------------------------
    # CASE 2: Incremental update
    # -------------------------------
    df_old = pd.read_csv(file_path)
    if df_old.empty:
        last_date = pd.Timestamp("1900-01-01")
    else:
        df_old["date"] = pd.to_datetime(df_old["date"])
        last_date = df_old["date"].max()

    days_stale = (pd.Timestamp.today().normalize() - last_date).days

    if days_stale > MAX_STALE_DAYS:
        print(f"⚠ WARNING: {tk} last update {days_stale} days ago (last date {last_date.date()})")

    print(f"[UPDATE] {tk}: last = {last_date.date()} → checking for new rows...")

    df_new = get_incremental_sep(tk, last_date)

    if df_new.empty:
        print("    → No new data")
        continue

    print(f"    → Appending {len(df_new)} new rows (through {df_new['date'].max().date()})")

    # merge and dedupe
    df_updated = pd.concat([df_old, df_new], ignore_index=True)
    df_updated = df_updated.sort_values("date").drop_duplicates(subset=["date"])

    df_updated.to_csv(file_path, index=False)

print("\n=== DONE: Incremental update complete ===")



=== INCREMENTAL SHARADAR SEP UPDATE ===

[UPDATE] A: last = 2025-12-24 → checking for new rows...
    → Appending 3 new rows (through 2025-12-30)
[UPDATE] AAL: last = 2025-12-24 → checking for new rows...
    → Appending 3 new rows (through 2025-12-30)
[UPDATE] AAMRQ: last = 2013-12-05 → checking for new rows...
    → No new data
[UPDATE] AAP: last = 2025-12-24 → checking for new rows...
    → Appending 3 new rows (through 2025-12-30)
[UPDATE] AAPL: last = 2025-12-24 → checking for new rows...
    → Appending 3 new rows (through 2025-12-30)
[UPDATE] ABBV: last = 2025-12-24 → checking for new rows...
    → Appending 3 new rows (through 2025-12-30)
[UPDATE] ABI1: last = 2008-11-21 → checking for new rows...
    → No new data
[UPDATE] ABKFQ: last = 2010-11-16 → checking for new rows...
    → No new data
[UPDATE] ABMD: last = 2022-12-21 → checking for new rows...
    → No new data
[UPDATE] ABNB: last = 2025-12-24 → checking for new rows...
    → Appending 3 new rows (through 2025-12-30)
[