In [None]:
#!/usr/bin/env python3

import csv
import subprocess
import json
import os
import time
from datetime import datetime, timedelta

# Deps: pip install yfinance
import yfinance as yf

# -------------------- Config --------------------

# Stocks
STOCKS  = ["CSCO", "TMUS", "TWLO", "PEGA", "ROG", "PD"]
TICKERS = STOCKS

FUND_TICKERS = ["CSCO", "TMUS", "TWLO", "PEGA", "ROG", "PD"]

SECTOR_MAP = {
    "CSCO": "Information Technology",
    "TMUS": "Communication Services",
    "TWLO": "Information Technology",
    "PEGA": "Information Technology",
    "ROG":  "Information Technology",
    "PD":   "Information Technology",
}

# Date window
END   = datetime.now()
START = END - timedelta(days=365)

# Default base dir (each run gets a new subfolder here)
DEFAULT_BASE_DIR = "/Users/paulgrass/Library/Mobile Documents/com~apple~CloudDocs/Documents/Programming/Git/pilot3-asset-data"

# Path to CSV “dictionary”
CSV_METRICS_PATH = "/Users/paulgrass/Library/Mobile Documents/com~apple~CloudDocs/Documents/Model Spillovers/Data/Stock Selection/preselection/candidate_subset_all.csv"

# Rate limiting
SLEEP_BETWEEN_TICKERS_SEC = 10
MAX_RETRIES = 3
RETRY_DELAY_SEC = 5

# Safety: never allow writing into these substrings
FORBIDDEN_SUBSTRINGS = ["pilot2-asset-data"]

# -------------------- Helpers --------------------

def git_commit_and_push(repo_root: str, run_dir: str, current_dir: str, branch: str = "main"):
    # Only commit if there are changes in current/ or today's run folder
    rel_run = os.path.relpath(run_dir, repo_root)
    rel_cur = os.path.relpath(current_dir, repo_root)

    cwd_before = os.getcwd()
    os.chdir(repo_root)
    try:
        diff = subprocess.run(
            ["git", "status", "--porcelain", rel_cur, rel_run],
            capture_output=True, text=True
        )
        if diff.returncode != 0:
            print("⚠️  git status failed; not pushing.")
            return
        if diff.stdout.strip() == "":
            print("ℹ️  No changes to commit; skipping push.")
            return

        # Stage just what we care about
        subprocess.run(["git", "add", rel_cur, rel_run], check=True)

        # Commit
        msg = f"Update data: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
        commit = subprocess.run(["git", "commit", "-m", msg], capture_output=True, text=True)
        if commit.returncode != 0:
            print(commit.stdout or commit.stderr or "ℹ️  Nothing to commit.")
            return

        # Push
        subprocess.run(["git", "push", "origin", branch], check=True)
        print("✅ Pushed to origin; GitHub Action will purge jsDelivr cache.")
    finally:
        os.chdir(cwd_before)

def guard_path(path: str):
    norm = os.path.normpath(path)
    for bad in FORBIDDEN_SUBSTRINGS:
        if bad in norm:
            raise RuntimeError(f"Refusing to write into forbidden path: {norm}")

def safe_mkdirs(path: str):
    guard_path(path)
    os.makedirs(path, exist_ok=True)

def make_daily_run_dir(base_dir: str) -> str:
    date_str = datetime.now().strftime("%Y-%m-%d")
    run_dir = os.path.join(base_dir, "runs", f"run_{date_str}")
    safe_mkdirs(run_dir)
    return run_dir

def ensure_current_dir(base_dir: str) -> str:
    cur = os.path.join(base_dir, "current_day2")
    safe_mkdirs(cur)
    return cur

def copy_to_current(src_path: str, current_dir: str):
    guard_path(current_dir)
    dst_path = os.path.join(current_dir, os.path.basename(src_path))
    with open(src_path, "rb") as s, open(dst_path, "wb") as d:
        d.write(s.read())
    return dst_path

def write_json(path: str, obj):
    guard_path(path)
    with open(path, "w") as f:
        json.dump(obj, f, indent=2)

# ---------- CSV metrics ----------

def _coerce_number(val):
    """Coerce CSV field to float if possible; return None for empty/invalid."""
    if val is None:
        return None
    s = str(val).strip()
    if s == "" or s.lower() in {"na", "nan", "none"}:
        return None
    try:
        return float(s.replace(",", ""))
    except Exception:
        return None

def fetch_metrics_from_csv(symbols, csv_path, sector_map=None):
    """
    Pull marketcap, pb_current, pb_current_pctile, div_y from a CSV keyed by ticker.

    Post-processing:
      - marketcap -> divide by 1,000,000 and round to 2 decimals (millions)
      - pb_current_pctile -> round to 2 decimals
      - div_y (aka div_yield/dividend_yield) -> divide by 100 (to decimal)
      - valuation (new): Low / Mid / High based on pb_current_pctile
    """
    lookup = {}
    with open(csv_path, newline="") as f:
        reader = csv.DictReader(f)
        for row in reader:
            row_lower = {k.lower(): v for k, v in row.items()}
            tk = (row_lower.get("ticker") or row_lower.get("symbol") or "").strip().upper()
            if tk:
                lookup[tk] = row_lower

    out = {}
    for sym in symbols:
        key = sym.upper()
        row = lookup.get(key)
        if not row:
            out[key] = {"error": "Ticker not found in CSV", "sector": (sector_map or {}).get(key)}
            continue

        mc_raw = _coerce_number(row.get("marketcap"))
        pb = _coerce_number(row.get("pb_current"))
        pb_pct_raw = _coerce_number(row.get("pb_current_pctile"))

        # Flexible dividend yield column handling
        div_candidates = ["div_y", "div_yield", "dividend_yield"]
        div_raw = None
        for c in div_candidates:
            if c in row:
                div_raw = _coerce_number(row.get(c))
                if div_raw is not None:
                    break

        # ---- Post-processing transforms ----
        mc_millions = round(mc_raw / 1_000_000_000, 2) if mc_raw is not None else None
        pb_pct = round(pb_pct_raw * 100, 0) if pb_pct_raw is not None else None
        div_val = (div_raw) if div_raw is not None else None

        # ---- Valuation classification ----
        if pb_pct is None:
            valuation = None
        elif pb_pct < 0.33:
            valuation = "Low"
        elif pb_pct < 0.67:
            valuation = "Mid"
        else:
            valuation = "High"

        out[key] = {
            "marketcap": mc_millions,
            "pb_current": pb,
            "pb_current_pctile": pb_pct,
            "div_y": div_val,
            "valuation": valuation,   # 👈 new field
        }
        if sector_map:
            out[key]["sector"] = sector_map.get(key)

    return out


# -------------------- Main --------------------

def main():
    base_dir = os.getenv("DATA_BASE_DIR", DEFAULT_BASE_DIR)
    guard_path(base_dir)
    safe_mkdirs(base_dir)

    run_dir = make_daily_run_dir(base_dir)
    current_dir = ensure_current_dir(base_dir)

    print(f"📁 Daily archive folder: {run_dir}")
    print(f"📂 Current folder for Qualtrics: {current_dir}")

    run_summary = {
        "started_at": datetime.now().isoformat(timespec="seconds"),
        "window_start": START.strftime("%Y-%m-%d"),
        "window_end": END.strftime("%Y-%m-%d"),
        "files": [],
        "errors": [],
    }

    # ---- Price history (yfinance) ----
    for sym in TICKERS:
        print(f"⏳ Fetching 365-day data for {sym}…")
        last_err = None
        for attempt in range(1, MAX_RETRIES + 1):
            try:
                tkr = yf.Ticker(sym)
                df = tkr.history(
                    start=START.strftime("%Y-%m-%d"),
                    end=END.strftime("%Y-%m-%d"),
                    auto_adjust=True,
                )
                if df is None or df.empty:
                    raise ValueError("Empty dataframe returned.")

                pts = [
                    [int(row_ts.timestamp() * 1000), round(float(row["Close"]), 2)]
                    for row_ts, row in df.iterrows()
                    if row.get("Close") is not None
                ]
                if not pts:
                    raise ValueError("No valid close prices found.")

                out_sym = sym.replace("-USD", "").replace(".", "").lower()

                out_path = os.path.join(run_dir, f"{out_sym}_365d.json")
                write_json(out_path, {"prices": pts})

                copy_to_current(out_path, current_dir)

                print(f" ✅ Wrote {out_sym}_365d.json ({len(pts)} points)")

                run_summary["files"].append({
                    "symbol": sym,
                    "file": out_path,
                    "points": len(pts),
                    "first_ts": pts[0][0],
                    "last_ts": pts[-1][0],
                })
                last_err = None
                break
            except Exception as e:
                last_err = str(e)
                print(f"   ⚠️ Attempt {attempt}/{MAX_RETRIES} failed: {last_err}")
                if attempt < MAX_RETRIES:
                    time.sleep(RETRY_DELAY_SEC)

        if last_err:
            run_summary["errors"].append({"symbol": sym, "error": last_err})

        time.sleep(SLEEP_BETWEEN_TICKERS_SEC)

    # ---- Metrics from CSV (no Yahoo fundamentals) ----
    try:
        funds = fetch_metrics_from_csv(
            FUND_TICKERS,
            CSV_METRICS_PATH,
            sector_map=SECTOR_MAP
        )
    except Exception as e:
        funds = {sym: {"error": f"CSV read failed: {e}", "sector": SECTOR_MAP.get(sym)} for sym in FUND_TICKERS}

    funds_run_path = os.path.join(run_dir, "fundamentals.json")
    funds_cur_path = os.path.join(current_dir, "fundamentals.json")
    write_json(funds_run_path, funds)
    write_json(funds_cur_path, funds)

    run_summary["fundamentals"] = {
        "tickers": FUND_TICKERS,
        "file_run": funds_run_path,
        "file_current": funds_cur_path,
        "source": "csv",
        "csv_path": CSV_METRICS_PATH,
        "fields": ["marketcap", "pb_current", "pb_current_pctile", "div_y"]
    }

    # ---- Save summary ----
    write_json(os.path.join(run_dir, "summary.json"), run_summary)
    write_json(os.path.join(current_dir, "summary.json"), run_summary)

    # ---- Auto-commit & push ----
    git_commit_and_push(
        repo_root=base_dir,
        run_dir=run_dir,
        current_dir=current_dir,
        branch="main"
    )

    print("🏁 Done.")
    if run_summary["errors"]:
        print("Some symbols failed (see summary.json).")
    else:
        print("All symbols fetched successfully.")
    print(f"Latest 'current' folder ready for Qualtrics: {current_dir}")

if __name__ == "__main__":
    main()


📁 Daily archive folder: /Users/paulgrass/Library/Mobile Documents/com~apple~CloudDocs/Documents/Programming/Git/pilot3-asset-data/runs/run_2025-10-21
📂 Current folder for Qualtrics: /Users/paulgrass/Library/Mobile Documents/com~apple~CloudDocs/Documents/Programming/Git/pilot3-asset-data/current_day2
⏳ Fetching 365-day data for CSCO…
 ✅ Wrote csco_365d.json (250 points)
⏳ Fetching 365-day data for TMUS…
 ✅ Wrote tmus_365d.json (250 points)
⏳ Fetching 365-day data for TWLO…
 ✅ Wrote twlo_365d.json (250 points)
⏳ Fetching 365-day data for PEGA…
 ✅ Wrote pega_365d.json (250 points)
⏳ Fetching 365-day data for ROG…
 ✅ Wrote rog_365d.json (250 points)
⏳ Fetching 365-day data for PD…
 ✅ Wrote pd_365d.json (250 points)
✅ Pushed to origin; GitHub Action will purge jsDelivr cache.
🏁 Done.
All symbols fetched successfully.
Latest 'current' folder ready for Qualtrics: /Users/paulgrass/Library/Mobile Documents/com~apple~CloudDocs/Documents/Programming/Git/pilot3-asset-data/current_day2


To github.com:pagrass/pilot1-asset-data.git
   2594ed8..5f50daa  main -> main


In [None]:
#!/usr/bin/env python3

import subprocess
import json
import os
import time
from datetime import datetime, timedelta

# Deps: pip install yfinance curl_cffi
from curl_cffi import requests
import yfinance as yf

# -------------------- Config --------------------

# Stocks & cryptos
STOCKS  = ["CSCO", "TMUS", "TWLO", "PEGA", "ROG", "PD"]

#CRYPTOS = ["BTC-USD", "ETH-USD", "XMR-USD", "APT21794-USD", "QNT-USD", "TON11419-USD", "DOT-USD"]
TICKERS = STOCKS 
#+ CRYPTOS

FUND_TICKERS = ["CSCO", "TMUS", "TWLO", "PEGA", "ROG", "PD"]

SECTOR_MAP = {
    "CSCO": "Information Technology",
    "TMUS": "Communication Services",
    "TWLO": "Information Technology",
    "PEGA": "Information Technology",
    "ROG": "Information Technology",
    "PD": "Information Technologys",
}

# Date window
END   = datetime.now()
START = END - timedelta(days=365)

# Default base dir (each run gets a new subfolder here)
DEFAULT_BASE_DIR = "/Users/paulgrass/Library/Mobile Documents/com~apple~CloudDocs/Documents/Programming/Git/pilot4-asset-data"


# Rate limiting
SLEEP_BETWEEN_TICKERS_SEC = 10
MAX_RETRIES = 3
RETRY_DELAY_SEC = 5

# Safety: never allow writing into these substrings
FORBIDDEN_SUBSTRINGS = ["pilot3-asset-data"]

# -------------------- Helpers --------------------

def git_commit_and_push(repo_root: str, run_dir: str, current_dir: str, branch: str = "main"):
    # Only commit if there are changes in current/ or today's run folder
    rel_run = os.path.relpath(run_dir, repo_root)
    rel_cur = os.path.relpath(current_dir, repo_root)

    # Make sure we're in the repo root so git paths work
    cwd_before = os.getcwd()
    os.chdir(repo_root)
    try:
        # Check if there are changes to these paths
        diff = subprocess.run(
            ["git", "status", "--porcelain", rel_cur, rel_run],
            capture_output=True, text=True
        )
        if diff.returncode != 0:
            print("⚠️  git status failed; not pushing.")
            return
        if diff.stdout.strip() == "":
            print("ℹ️  No changes to commit; skipping push.")
            return

        # Ensure basic identity is set (won't override if already set)
        subprocess.run(["git", "config", "--get", "user.email"], check=False)
        subprocess.run(["git", "config", "--get", "user.name"], check=False)

        # Stage just what we care about
        subprocess.run(["git", "add", rel_cur, rel_run], check=True)

        # Commit
        msg = f"Update data: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
        commit = subprocess.run(["git", "commit", "-m", msg], capture_output=True, text=True)
        if commit.returncode != 0:
            # Likely "nothing to commit" race; bail quietly
            print(commit.stdout or commit.stderr or "ℹ️  Nothing to commit.")
            return

        # Push
        subprocess.run(["git", "push", "origin", branch], check=True)
        print("✅ Pushed to origin; GitHub Action will purge jsDelivr cache.")
    finally:
        os.chdir(cwd_before)

def fetch_fundamentals(symbols, yf_session, max_retries=3, retry_delay=5):
    """
    Returns a dict {SYM: {eps, pe, div_y, sector}}.
    Uses yfinance .info (can be slow/flaky; retried for robustness).
    """
    out = {}
    for sym in symbols:
        print(f"⏳ Fetching fundamentals for {sym}…")
        last_err = None
        for attempt in range(1, max_retries + 1):
            try:
                t = yf.Ticker(sym, session=yf_session)
                info = t.info  # may be slow / rate-limited
                eps = info.get("trailingEps")
                price = info.get("regularMarketPrice")
                pe = round(price / eps, 1) if eps and price else None
                dy = info.get("dividendYield")
                if dy is not None:
                    dy = round(dy * 100, 1)
                out[sym] = {
                    "eps": eps,
                    "pe": pe,
                    "div_y": dy,
                    "sector": SECTOR_MAP.get(sym),
                }
                last_err = None
                break
            except Exception as e:
                last_err = str(e)
                print(f"   ⚠️  Attempt {attempt}/{max_retries} failed: {last_err}")
                if attempt < max_retries:
                    time.sleep(retry_delay)
        if last_err:
            # record partial error so you see it in summary.json
            out[sym] = {"error": last_err, "sector": SECTOR_MAP.get(sym)}
        time.sleep(2)  # small spacing to be polite
    return out


def guard_path(path: str):
    norm = os.path.normpath(path)
    for bad in FORBIDDEN_SUBSTRINGS:
        if bad in norm:
            raise RuntimeError(f"Refusing to write into forbidden path: {norm}")

def safe_mkdirs(path: str):
    guard_path(path)
    os.makedirs(path, exist_ok=True)

def make_daily_run_dir(base_dir: str) -> str:
    date_str = datetime.now().strftime("%Y-%m-%d")
    run_dir = os.path.join(base_dir, "runs", f"run_{date_str}")
    safe_mkdirs(run_dir)
    return run_dir


def ensure_current_dir(base_dir: str) -> str:
    # Put current/ directly under base_dir (not inside runs/)
    cur = os.path.join(base_dir, "current_day2")
    safe_mkdirs(cur)
    return cur

def copy_to_current(src_path: str, current_dir: str):
    guard_path(current_dir)
    dst_path = os.path.join(current_dir, os.path.basename(src_path))
    with open(src_path, "rb") as s, open(dst_path, "wb") as d:
        d.write(s.read())
    return dst_path



def write_json(path: str, obj):
    guard_path(path)
    with open(path, "w") as f:
        json.dump(obj, f, indent=2)

def create_or_update_symlink(target_dir: str, link_path: str):
    # Create/refresh a 'latest' symlink for convenience (best-effort)
    try:
        if os.path.islink(link_path) or os.path.exists(link_path):
            os.remove(link_path)
        os.symlink(target_dir, link_path)
    except Exception:
        # Non-fatal if symlink fails (e.g., on certain filesystems)
        pass

# -------------------- Main --------------------

def main():
    session = requests.Session(impersonate="chrome124")
    yf_session = session  # yfinance accepts a curl_cffi session via 'session'

    base_dir = os.getenv("DATA_BASE_DIR", DEFAULT_BASE_DIR)
    guard_path(base_dir)
    safe_mkdirs(base_dir)

    run_dir = make_daily_run_dir(base_dir)
    current_dir = ensure_current_dir(base_dir)

    print(f"📁 Daily archive folder: {run_dir}")
    print(f"📂 Current folder for Qualtrics: {current_dir}")

    run_summary = {
        "started_at": datetime.now().isoformat(timespec="seconds"),
        "window_start": START.strftime("%Y-%m-%d"),
        "window_end": END.strftime("%Y-%m-%d"),
        "files": [],
        "errors": [],
    }

    for sym in TICKERS:
        print(f"⏳ Fetching 365-day data for {sym}…")
        last_err = None
        for attempt in range(1, MAX_RETRIES + 1):
            try:
                tkr = yf.Ticker(sym, session=yf_session)
                df = tkr.history(
                    start=START.strftime("%Y-%m-%d"),
                    end=END.strftime("%Y-%m-%d"),
                    auto_adjust=True,
                )
                if df is None or df.empty:
                    raise ValueError("Empty dataframe returned.")

                pts = [
                    [int(row_ts.timestamp() * 1000), round(float(row["Close"]), 2)]
                    for row_ts, row in df.iterrows()
                    if row.get("Close") is not None
                ]
                if not pts:
                    raise ValueError("No valid close prices found.")

                out_sym = sym.replace("-USD", "").replace(".", "").lower()

                # Write to daily run folder
                out_path = os.path.join(run_dir, f"{out_sym}_365d.json")
                write_json(out_path, {"prices": pts})

                # Also copy to current folder for Qualtrics
                copy_to_current(out_path, current_dir)

                print(f" ✅ Wrote {out_sym}_365d.json ({len(pts)} points)")

                run_summary["files"].append({
                    "symbol": sym,
                    "file": out_path,
                    "points": len(pts),
                    "first_ts": pts[0][0],
                    "last_ts": pts[-1][0],
                })
                last_err = None
                break
            except Exception as e:
                last_err = str(e)
                print(f"   ⚠️ Attempt {attempt}/{MAX_RETRIES} failed: {last_err}")
                if attempt < MAX_RETRIES:
                    time.sleep(RETRY_DELAY_SEC)

        if last_err:
            run_summary["errors"].append({"symbol": sym, "error": last_err})

        time.sleep(SLEEP_BETWEEN_TICKERS_SEC)
    # --- fundamentals ---
    funds = fetch_fundamentals(FUND_TICKERS, yf_session, max_retries=MAX_RETRIES, retry_delay=RETRY_DELAY_SEC)

    # save fundamentals.json in both places
    funds_run_path = os.path.join(run_dir, "fundamentals.json")
    funds_cur_path = os.path.join(current_dir, "fundamentals.json")
    write_json(funds_run_path, funds)
    write_json(funds_cur_path, funds)

    # add to summary
    run_summary["fundamentals"] = {
        "tickers": FUND_TICKERS,
        "file_run": funds_run_path,
        "file_current": funds_cur_path,
    }

    # Save summary to both locations
    write_json(os.path.join(run_dir, "summary.json"), run_summary)
    write_json(os.path.join(current_dir, "summary.json"), run_summary)

    # Auto-commit & push (triggers purge workflow)
    git_commit_and_push(
        repo_root=base_dir,
        run_dir=run_dir,
        current_dir=current_dir,
        branch="main"
    )


    print("🏁 Done.")
    if run_summary["errors"]:
        print("Some symbols failed (see summary.json).")
    else:
        print("All symbols fetched successfully.")
    print(f"Latest 'current' folder ready for Qualtrics: {current_dir}")

if __name__ == "__main__":
    main()

📁 Daily archive folder: /Users/paulgrass/Library/Mobile Documents/com~apple~CloudDocs/Documents/Programming/Git/pilot3-asset-data/runs/run_2025-08-19
📂 Current folder for Qualtrics: /Users/paulgrass/Library/Mobile Documents/com~apple~CloudDocs/Documents/Programming/Git/pilot3-asset-data/current_day2
⏳ Fetching 365-day data for BTC-USD…
 ✅ Wrote btc_365d.json (365 points)
⏳ Fetching 365-day data for ETH-USD…
 ✅ Wrote eth_365d.json (365 points)
⏳ Fetching 365-day data for XMR-USD…
 ✅ Wrote xmr_365d.json (365 points)
⏳ Fetching 365-day data for APT21794-USD…
 ✅ Wrote apt21794_365d.json (365 points)
⏳ Fetching 365-day data for QNT-USD…
 ✅ Wrote qnt_365d.json (365 points)
⏳ Fetching 365-day data for TON11419-USD…
 ✅ Wrote ton11419_365d.json (365 points)
⏳ Fetching 365-day data for DOT-USD…
 ✅ Wrote dot_365d.json (365 points)
⏳ Fetching fundamentals for DOCN…
⏳ Fetching fundamentals for MSFT…
⏳ Fetching fundamentals for VZ…
⏳ Fetching fundamentals for ZS…
⏳ Fetching fundamentals for UFPI…
⏳ 

To github.com:pagrass/pilot1-asset-data.git
   79058b4..ba00fc3  main -> main


In [None]:
#!/usr/bin/env python3

import subprocess
import json
import os
import time
from datetime import datetime, timedelta

# Deps: pip install yfinance curl_cffi
from curl_cffi import requests
import yfinance as yf

# -------------------- Config --------------------

# Stocks & cryptos
STOCKS  = ["CSCO", "ARLO", "CMPO", "ZS", "KO", "MCD"]
#STOCKS  = []

#CRYPTOS = ["BTC-USD", "ETH-USD", "XMR-USD", "APT21794-USD", "QNT-USD", "TON11419-USD", "DOT-USD"]
CRYPTOS = []
TICKERS = STOCKS + CRYPTOS

FUND_TICKERS = ["DOCN", "ARLO", "CMPO", "ZS", "KO", "MCD"]

SECTOR_MAP = {
    "DOCN": "Technology",
    "MSFT": "Technology",
    "VZ":  "Communication Services",
    "ZS":  "Technology",
    "UFPI": "Basic Materials",
    "DY":   "Industrials",
}

# Date window
END   = datetime.now()
START = END - timedelta(days=365)

# Default base dir (each run gets a new subfolder here)
DEFAULT_BASE_DIR = "/Users/paulgrass/Library/Mobile Documents/com~apple~CloudDocs/Documents/Programming/Git/pilot3-asset-data"


# Rate limiting
SLEEP_BETWEEN_TICKERS_SEC = 10
MAX_RETRIES = 3
RETRY_DELAY_SEC = 5

# Safety: never allow writing into these substrings
FORBIDDEN_SUBSTRINGS = ["pilot2-asset-data"]

# -------------------- Helpers --------------------

def git_commit_and_push(repo_root: str, run_dir: str, current_dir: str, branch: str = "main"):
    # Only commit if there are changes in current/ or today's run folder
    rel_run = os.path.relpath(run_dir, repo_root)
    rel_cur = os.path.relpath(current_dir, repo_root)

    # Make sure we're in the repo root so git paths work
    cwd_before = os.getcwd()
    os.chdir(repo_root)
    try:
        # Check if there are changes to these paths
        diff = subprocess.run(
            ["git", "status", "--porcelain", rel_cur, rel_run],
            capture_output=True, text=True
        )
        if diff.returncode != 0:
            print("⚠️  git status failed; not pushing.")
            return
        if diff.stdout.strip() == "":
            print("ℹ️  No changes to commit; skipping push.")
            return

        # Ensure basic identity is set (won't override if already set)
        subprocess.run(["git", "config", "--get", "user.email"], check=False)
        subprocess.run(["git", "config", "--get", "user.name"], check=False)

        # Stage just what we care about
        subprocess.run(["git", "add", rel_cur, rel_run], check=True)

        # Commit
        msg = f"Update data: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
        commit = subprocess.run(["git", "commit", "-m", msg], capture_output=True, text=True)
        if commit.returncode != 0:
            # Likely "nothing to commit" race; bail quietly
            print(commit.stdout or commit.stderr or "ℹ️  Nothing to commit.")
            return

        # Push
        subprocess.run(["git", "push", "origin", branch], check=True)
        print("✅ Pushed to origin; GitHub Action will purge jsDelivr cache.")
    finally:
        os.chdir(cwd_before)

def fetch_fundamentals(symbols, yf_session, max_retries=3, retry_delay=5):
    """
    Returns a dict {SYM: {eps, pe, div_y, sector}}.
    Uses yfinance .info (can be slow/flaky; retried for robustness).
    """
    out = {}
    for sym in symbols:
        print(f"⏳ Fetching fundamentals for {sym}…")
        last_err = None
        for attempt in range(1, max_retries + 1):
            try:
                t = yf.Ticker(sym, session=yf_session)
                info = t.info  # may be slow / rate-limited
                eps = info.get("trailingEps")
                price = info.get("regularMarketPrice")
                pe = round(price / eps, 1) if eps and price else None
                dy = info.get("dividendYield")
                if dy is not None:
                    dy = round(dy * 100, 1)
                out[sym] = {
                    "eps": eps,
                    "pe": pe,
                    "div_y": dy,
                    "sector": SECTOR_MAP.get(sym),
                }
                last_err = None
                break
            except Exception as e:
                last_err = str(e)
                print(f"   ⚠️  Attempt {attempt}/{max_retries} failed: {last_err}")
                if attempt < max_retries:
                    time.sleep(retry_delay)
        if last_err:
            # record partial error so you see it in summary.json
            out[sym] = {"error": last_err, "sector": SECTOR_MAP.get(sym)}
        time.sleep(2)  # small spacing to be polite
    return out


def guard_path(path: str):
    norm = os.path.normpath(path)
    for bad in FORBIDDEN_SUBSTRINGS:
        if bad in norm:
            raise RuntimeError(f"Refusing to write into forbidden path: {norm}")

def safe_mkdirs(path: str):
    guard_path(path)
    os.makedirs(path, exist_ok=True)

def make_daily_run_dir(base_dir: str) -> str:
    date_str = datetime.now().strftime("%Y-%m-%d")
    run_dir = os.path.join(base_dir, "runs", f"run_{date_str}")
    safe_mkdirs(run_dir)
    return run_dir


def ensure_current_dir(base_dir: str) -> str:
    # Put current/ directly under base_dir (not inside runs/)
    cur = os.path.join(base_dir, "current_day2")
    safe_mkdirs(cur)
    return cur

def copy_to_current(src_path: str, current_dir: str):
    guard_path(current_dir)
    dst_path = os.path.join(current_dir, os.path.basename(src_path))
    with open(src_path, "rb") as s, open(dst_path, "wb") as d:
        d.write(s.read())
    return dst_path



def write_json(path: str, obj):
    guard_path(path)
    with open(path, "w") as f:
        json.dump(obj, f, indent=2)

def create_or_update_symlink(target_dir: str, link_path: str):
    # Create/refresh a 'latest' symlink for convenience (best-effort)
    try:
        if os.path.islink(link_path) or os.path.exists(link_path):
            os.remove(link_path)
        os.symlink(target_dir, link_path)
    except Exception:
        # Non-fatal if symlink fails (e.g., on certain filesystems)
        pass

# -------------------- Main --------------------

def main():
    session = requests.Session(impersonate="chrome124")
    yf_session = session  # yfinance accepts a curl_cffi session via 'session'

    base_dir = os.getenv("DATA_BASE_DIR", DEFAULT_BASE_DIR)
    guard_path(base_dir)
    safe_mkdirs(base_dir)

    run_dir = make_daily_run_dir(base_dir)
    current_dir = ensure_current_dir(base_dir)

    print(f"📁 Daily archive folder: {run_dir}")
    print(f"📂 Current folder for Qualtrics: {current_dir}")

    run_summary = {
        "started_at": datetime.now().isoformat(timespec="seconds"),
        "window_start": START.strftime("%Y-%m-%d"),
        "window_end": END.strftime("%Y-%m-%d"),
        "files": [],
        "errors": [],
    }

    for sym in TICKERS:
        print(f"⏳ Fetching 365-day data for {sym}…")
        last_err = None
        for attempt in range(1, MAX_RETRIES + 1):
            try:
                tkr = yf.Ticker(sym, session=yf_session)
                df = tkr.history(
                    start=START.strftime("%Y-%m-%d"),
                    end=END.strftime("%Y-%m-%d"),
                    auto_adjust=True,
                )
                if df is None or df.empty:
                    raise ValueError("Empty dataframe returned.")

                pts = [
                    [int(row_ts.timestamp() * 1000), round(float(row["Close"]), 2)]
                    for row_ts, row in df.iterrows()
                    if row.get("Close") is not None
                ]
                if not pts:
                    raise ValueError("No valid close prices found.")

                out_sym = sym.replace("-USD", "").replace(".", "").lower()

                # Write to daily run folder
                out_path = os.path.join(run_dir, f"{out_sym}_365d.json")
                write_json(out_path, {"prices": pts})

                # Also copy to current folder for Qualtrics
                copy_to_current(out_path, current_dir)

                print(f" ✅ Wrote {out_sym}_365d.json ({len(pts)} points)")

                run_summary["files"].append({
                    "symbol": sym,
                    "file": out_path,
                    "points": len(pts),
                    "first_ts": pts[0][0],
                    "last_ts": pts[-1][0],
                })
                last_err = None
                break
            except Exception as e:
                last_err = str(e)
                print(f"   ⚠️ Attempt {attempt}/{MAX_RETRIES} failed: {last_err}")
                if attempt < MAX_RETRIES:
                    time.sleep(RETRY_DELAY_SEC)

        if last_err:
            run_summary["errors"].append({"symbol": sym, "error": last_err})

        time.sleep(SLEEP_BETWEEN_TICKERS_SEC)
    # --- fundamentals ---
    funds = fetch_fundamentals(FUND_TICKERS, yf_session, max_retries=MAX_RETRIES, retry_delay=RETRY_DELAY_SEC)

    # save fundamentals.json in both places
    funds_run_path = os.path.join(run_dir, "fundamentals.json")
    funds_cur_path = os.path.join(current_dir, "fundamentals.json")
    write_json(funds_run_path, funds)
    write_json(funds_cur_path, funds)

    # add to summary
    run_summary["fundamentals"] = {
        "tickers": FUND_TICKERS,
        "file_run": funds_run_path,
        "file_current": funds_cur_path,
    }

    # Save summary to both locations
    write_json(os.path.join(run_dir, "summary.json"), run_summary)
    write_json(os.path.join(current_dir, "summary.json"), run_summary)

    # Auto-commit & push (triggers purge workflow)
    git_commit_and_push(
        repo_root=base_dir,
        run_dir=run_dir,
        current_dir=current_dir,
        branch="main"
    )


    print("🏁 Done.")
    if run_summary["errors"]:
        print("Some symbols failed (see summary.json).")
    else:
        print("All symbols fetched successfully.")
    print(f"Latest 'current' folder ready for Qualtrics: {current_dir}")

if __name__ == "__main__":
    main()

📁 Daily archive folder: /Users/paulgrass/Library/Mobile Documents/com~apple~CloudDocs/Documents/Programming/Git/pilot3-asset-data/runs/run_2025-10-20
📂 Current folder for Qualtrics: /Users/paulgrass/Library/Mobile Documents/com~apple~CloudDocs/Documents/Programming/Git/pilot3-asset-data/current_day2
⏳ Fetching 365-day data for CSCO…
 ✅ Wrote csco_365d.json (249 points)
⏳ Fetching 365-day data for MSFT…
 ✅ Wrote msft_365d.json (249 points)


KeyboardInterrupt: 