In [6]:
#!/usr/bin/env python3

import subprocess
import json
import os
import time
from datetime import datetime, timedelta

# Deps: pip install yfinance curl_cffi
from curl_cffi import requests
import yfinance as yf

# -------------------- Config --------------------

# Stocks & cryptos
STOCKS  = ["NVDA", "TER", "PAYC", "KO", "PEP"]
CRYPTOS = ["BTC-USD", "ETH-USD", "ADA-USD", "ATOM-USD", "QNT-USD", "LTC-USD", "TON11419-USD"]
TICKERS = STOCKS + CRYPTOS

FUND_TICKERS = ["NVDA", "PAYC", "TER", "TGT", "COST", "KO", "PEP"]

SECTOR_MAP = {
    "NVDA": "Technology",
    "PAYC": "Technology",
    "TER":  "Technology",
    "TGT":  "Consumer Defensive",
    "COST": "Consumer Defensive",
    "KO":   "Consumer Defensive",
    "PEP":  "Consumer Defensive",
}

# Date window
END   = datetime.now()
START = END - timedelta(days=365)

# Default base dir (each run gets a new subfolder here)
DEFAULT_BASE_DIR = "/Users/paulgrass/Library/Mobile Documents/com~apple~CloudDocs/Documents/Programming/Git/pilot3-asset-data"


# Rate limiting
SLEEP_BETWEEN_TICKERS_SEC = 10
MAX_RETRIES = 3
RETRY_DELAY_SEC = 5

# Safety: never allow writing into these substrings
FORBIDDEN_SUBSTRINGS = ["pilot2-asset-data"]

# -------------------- Helpers --------------------

def git_commit_and_push(repo_root: str, run_dir: str, current_dir: str, branch: str = "main"):
    # Only commit if there are changes in current/ or today's run folder
    rel_run = os.path.relpath(run_dir, repo_root)
    rel_cur = os.path.relpath(current_dir, repo_root)

    # Make sure we're in the repo root so git paths work
    cwd_before = os.getcwd()
    os.chdir(repo_root)
    try:
        # Check if there are changes to these paths
        diff = subprocess.run(
            ["git", "status", "--porcelain", rel_cur, rel_run],
            capture_output=True, text=True
        )
        if diff.returncode != 0:
            print("⚠️  git status failed; not pushing.")
            return
        if diff.stdout.strip() == "":
            print("ℹ️  No changes to commit; skipping push.")
            return

        # Ensure basic identity is set (won't override if already set)
        subprocess.run(["git", "config", "--get", "user.email"], check=False)
        subprocess.run(["git", "config", "--get", "user.name"], check=False)

        # Stage just what we care about
        subprocess.run(["git", "add", rel_cur, rel_run], check=True)

        # Commit
        msg = f"Update data: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
        commit = subprocess.run(["git", "commit", "-m", msg], capture_output=True, text=True)
        if commit.returncode != 0:
            # Likely "nothing to commit" race; bail quietly
            print(commit.stdout or commit.stderr or "ℹ️  Nothing to commit.")
            return

        # Push
        subprocess.run(["git", "push", "origin", branch], check=True)
        print("✅ Pushed to origin; GitHub Action will purge jsDelivr cache.")
    finally:
        os.chdir(cwd_before)

def fetch_fundamentals(symbols, yf_session, max_retries=3, retry_delay=5):
    """
    Returns a dict {SYM: {eps, pe, div_y, sector}}.
    Uses yfinance .info (can be slow/flaky; retried for robustness).
    """
    out = {}
    for sym in symbols:
        print(f"⏳ Fetching fundamentals for {sym}…")
        last_err = None
        for attempt in range(1, max_retries + 1):
            try:
                t = yf.Ticker(sym, session=yf_session)
                info = t.info  # may be slow / rate-limited
                eps = info.get("trailingEps")
                price = info.get("regularMarketPrice")
                pe = round(price / eps, 1) if eps and price else None
                dy = info.get("dividendYield")
                if dy is not None:
                    dy = round(dy * 100, 1)
                out[sym] = {
                    "eps": eps,
                    "pe": pe,
                    "div_y": dy,
                    "sector": SECTOR_MAP.get(sym),
                }
                last_err = None
                break
            except Exception as e:
                last_err = str(e)
                print(f"   ⚠️  Attempt {attempt}/{max_retries} failed: {last_err}")
                if attempt < max_retries:
                    time.sleep(retry_delay)
        if last_err:
            # record partial error so you see it in summary.json
            out[sym] = {"error": last_err, "sector": SECTOR_MAP.get(sym)}
        time.sleep(2)  # small spacing to be polite
    return out


def guard_path(path: str):
    norm = os.path.normpath(path)
    for bad in FORBIDDEN_SUBSTRINGS:
        if bad in norm:
            raise RuntimeError(f"Refusing to write into forbidden path: {norm}")

def safe_mkdirs(path: str):
    guard_path(path)
    os.makedirs(path, exist_ok=True)

def make_daily_run_dir(base_dir: str) -> str:
    date_str = datetime.now().strftime("%Y-%m-%d")
    run_dir = os.path.join(base_dir, "runs", f"run_{date_str}")
    safe_mkdirs(run_dir)
    return run_dir


def ensure_current_dir(base_dir: str) -> str:
    # Put current/ directly under base_dir (not inside runs/)
    cur = os.path.join(base_dir, "current")
    safe_mkdirs(cur)
    return cur

def copy_to_current(src_path: str, current_dir: str):
    guard_path(current_dir)
    dst_path = os.path.join(current_dir, os.path.basename(src_path))
    with open(src_path, "rb") as s, open(dst_path, "wb") as d:
        d.write(s.read())
    return dst_path



def write_json(path: str, obj):
    guard_path(path)
    with open(path, "w") as f:
        json.dump(obj, f, indent=2)

def create_or_update_symlink(target_dir: str, link_path: str):
    # Create/refresh a 'latest' symlink for convenience (best-effort)
    try:
        if os.path.islink(link_path) or os.path.exists(link_path):
            os.remove(link_path)
        os.symlink(target_dir, link_path)
    except Exception:
        # Non-fatal if symlink fails (e.g., on certain filesystems)
        pass

# -------------------- Main --------------------

def main():
    session = requests.Session(impersonate="chrome124")
    yf_session = session  # yfinance accepts a curl_cffi session via 'session'

    base_dir = os.getenv("DATA_BASE_DIR", DEFAULT_BASE_DIR)
    guard_path(base_dir)
    safe_mkdirs(base_dir)

    run_dir = make_daily_run_dir(base_dir)
    current_dir = ensure_current_dir(base_dir)

    print(f"📁 Daily archive folder: {run_dir}")
    print(f"📂 Current folder for Qualtrics: {current_dir}")

    run_summary = {
        "started_at": datetime.now().isoformat(timespec="seconds"),
        "window_start": START.strftime("%Y-%m-%d"),
        "window_end": END.strftime("%Y-%m-%d"),
        "files": [],
        "errors": [],
    }

    for sym in TICKERS:
        print(f"⏳ Fetching 365-day data for {sym}…")
        last_err = None
        for attempt in range(1, MAX_RETRIES + 1):
            try:
                tkr = yf.Ticker(sym, session=yf_session)
                df = tkr.history(
                    start=START.strftime("%Y-%m-%d"),
                    end=END.strftime("%Y-%m-%d"),
                    auto_adjust=True,
                )
                if df is None or df.empty:
                    raise ValueError("Empty dataframe returned.")

                pts = [
                    [int(row_ts.timestamp() * 1000), round(float(row["Close"]), 2)]
                    for row_ts, row in df.iterrows()
                    if row.get("Close") is not None
                ]
                if not pts:
                    raise ValueError("No valid close prices found.")

                out_sym = sym.replace("-USD", "").replace(".", "").lower()

                # Write to daily run folder
                out_path = os.path.join(run_dir, f"{out_sym}_365d.json")
                write_json(out_path, {"prices": pts})

                # Also copy to current folder for Qualtrics
                copy_to_current(out_path, current_dir)

                print(f" ✅ Wrote {out_sym}_365d.json ({len(pts)} points)")

                run_summary["files"].append({
                    "symbol": sym,
                    "file": out_path,
                    "points": len(pts),
                    "first_ts": pts[0][0],
                    "last_ts": pts[-1][0],
                })
                last_err = None
                break
            except Exception as e:
                last_err = str(e)
                print(f"   ⚠️ Attempt {attempt}/{MAX_RETRIES} failed: {last_err}")
                if attempt < MAX_RETRIES:
                    time.sleep(RETRY_DELAY_SEC)

        if last_err:
            run_summary["errors"].append({"symbol": sym, "error": last_err})

        time.sleep(SLEEP_BETWEEN_TICKERS_SEC)
    # --- fundamentals ---
    funds = fetch_fundamentals(FUND_TICKERS, yf_session, max_retries=MAX_RETRIES, retry_delay=RETRY_DELAY_SEC)

    # save fundamentals.json in both places
    funds_run_path = os.path.join(run_dir, "fundamentals.json")
    funds_cur_path = os.path.join(current_dir, "fundamentals.json")
    write_json(funds_run_path, funds)
    write_json(funds_cur_path, funds)

    # add to summary
    run_summary["fundamentals"] = {
        "tickers": FUND_TICKERS,
        "file_run": funds_run_path,
        "file_current": funds_cur_path,
    }

    # Save summary to both locations
    write_json(os.path.join(run_dir, "summary.json"), run_summary)
    write_json(os.path.join(current_dir, "summary.json"), run_summary)

    # Auto-commit & push (triggers purge workflow)
    git_commit_and_push(
        repo_root=base_dir,
        run_dir=run_dir,
        current_dir=current_dir,
        branch="main"
    )


    print("🏁 Done.")
    if run_summary["errors"]:
        print("Some symbols failed (see summary.json).")
    else:
        print("All symbols fetched successfully.")
    print(f"Latest 'current' folder ready for Qualtrics: {current_dir}")

if __name__ == "__main__":
    main()

📁 Daily archive folder: /Users/paulgrass/Library/Mobile Documents/com~apple~CloudDocs/Documents/Programming/Git/pilot3-asset-data/runs/run_2025-08-10
📂 Current folder for Qualtrics: /Users/paulgrass/Library/Mobile Documents/com~apple~CloudDocs/Documents/Programming/Git/pilot3-asset-data/current
⏳ Fetching 365-day data for NVDA…
 ✅ Wrote nvda_365d.json (249 points)
⏳ Fetching 365-day data for TER…
 ✅ Wrote ter_365d.json (249 points)
⏳ Fetching 365-day data for PAYC…
 ✅ Wrote payc_365d.json (249 points)
⏳ Fetching 365-day data for KO…
 ✅ Wrote ko_365d.json (249 points)
⏳ Fetching 365-day data for PEP…
 ✅ Wrote pep_365d.json (249 points)
⏳ Fetching 365-day data for BTC-USD…
 ✅ Wrote btc_365d.json (364 points)
⏳ Fetching 365-day data for ETH-USD…
 ✅ Wrote eth_365d.json (364 points)
⏳ Fetching 365-day data for ADA-USD…
 ✅ Wrote ada_365d.json (364 points)
⏳ Fetching 365-day data for ATOM-USD…
 ✅ Wrote atom_365d.json (364 points)
⏳ Fetching 365-day data for QNT-USD…
 ✅ Wrote qnt_365d.json (36

To github.com:pagrass/pilot1-asset-data.git
   0c793f0..45eb83c  main -> main
