# Update Script
This notebook orchestrates data downloads and analysis refreshes.

In [3]:
# ========== Bootstrap: ensure required Python packages are present ==========
import importlib, subprocess, sys

def _ensure(pkg_name: str, import_name: str | None = None):
    """
    Import `import_name` (defaults to `pkg_name`); if that fails, pip‑install.
    """
    try:
        importlib.import_module(import_name or pkg_name)
    except ModuleNotFoundError:
        print(f"Package '{pkg_name}' not found — installing …")
        subprocess.check_call([sys.executable, "-m", "pip", "install", pkg_name])
    finally:
        globals()[import_name or pkg_name] = importlib.import_module(import_name or pkg_name)

# --- Required third‑party libraries ------------------------------------------
_ensure("pandas")
_ensure("requests")
print("All dependencies ready.\n")

# --- Standard imports --------------------------------------------------------
from pathlib import Path
import datetime as dt
import os, re, shutil, json
import pandas as pd, requests, urllib.parse

# --- Helper: replace [date %Y-%m-%d] tokens -----------------------------------
def substitute_date_tokens(url: str) -> str:
    def _replace(m):
        fmt = m.group(1).strip()
        return dt.date.today().strftime(fmt)
    return re.sub(r"\[date\s+([^\]]+)\]", _replace, url)

# --- Helper: append FRED API key if available --------------------------------
API_KEY = os.getenv("FRED_API_KEY")     # set in your shell or systemd unit

def add_apikey(url: str) -> str:
    if API_KEY:
        sep = "&" if "?" in url else "?"
        return f"{url}{sep}api_key={urllib.parse.quote_plus(API_KEY)}"
    return url

# --- Cadence map (word → minimum days between fetches) ------------------------
CADENCE_DAYS = {
    "daily": 1,
    "weekly": 7,
    "monthly": 30,
    "quarterly": 90,
}

# --- Resolve base directory so notebook works from repo root or data folder ---
BASE_DIR = Path.cwd() if Path('catalog.csv').exists() else Path.cwd() / 'data'

# --- Load catalog -------------------------------------------------------------
catalog_path = BASE_DIR / 'catalog.csv'
cat = pd.read_csv(catalog_path)

today = dt.date.today()
updated_rows = []                # remember which rows we refresh

for idx, row in cat.iterrows():
    folder = BASE_DIR / str(row['folder'])
    folder.mkdir(parents=True, exist_ok=True)

    latest_fp = folder / "latest.csv"
    last_fetched = (
        pd.to_datetime(row["last_fetched"]).date()
        if pd.notna(row["last_fetched"]) else None
    )

    # ---- Determine if an update is due --------------------------------------
    cadence = str(row["cadence"]).lower().strip()
    min_age = CADENCE_DAYS.get(cadence, 30)        # default 30 days
    needs_update = (
        (not latest_fp.exists()) or
        (not last_fetched) or
        (today - last_fetched).days >= min_age
    )

    if not needs_update:
        continue

    # ---- Build the request URL ---------------------------------------------
    url = substitute_date_tokens(str(row["url"]))
    url = add_apikey(url)

    print(f"Fetching {row['folder']} …", end=" ")
    try:
        r = requests.get(url, timeout=30)
        r.raise_for_status()
        # ---- Save snapshot and latest --------------------------------------
        dated_fp = folder / f"{today:%Y-%m-%d}.csv"
        dated_fp.write_bytes(r.content)
        shutil.copyfile(dated_fp, latest_fp)

        # ---- Mark success in catalog ---------------------------------------
        cat.at[idx, "last_fetched"] = today.isoformat()
        updated_rows.append(row["folder"])
        print("✓ success")
    except Exception as e:
        print(f"✗ failed: {e}")

# --- Persist catalog if anything changed -------------------------------------
if updated_rows:
    cat.to_csv(catalog_path, index=False)
    print("\nUpdated:", ", ".join(updated_rows))
else:
    print("Everything up to date.")


All dependencies ready.



OSError: [WinError 123] The filename, directory name, or volume label syntax is incorrect: 'data\\https:\\api.stlouisfed.org\\fred\\series\\observations?series_id=GDPC1&file_type=json&observation_end=[date %Y-%m-%d]'