In [2]:
!pip install yfinance


Collecting yfinance
  Downloading yfinance-0.2.66-py2.py3-none-any.whl.metadata (6.0 kB)
Collecting requests>=2.31 (from yfinance)
  Downloading requests-2.32.5-py3-none-any.whl.metadata (4.9 kB)
Collecting multitasking>=0.0.7 (from yfinance)
  Downloading multitasking-0.0.12.tar.gz (19 kB)
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Collecting frozendict>=2.3.4 (from yfinance)
  Downloading frozendict-2.4.7-py3-none-any.whl.metadata (23 kB)
Collecting peewee>=3.16.2 (from yfinance)
  Downloading peewee-3.18.3.tar.gz (3.0 MB)
[2K     [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m3.0/3.0 MB[0m [31m24.6 MB/s[0m  [33m0:00:00[0m
[?25h  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.t

In [6]:
#!/usr/bin/env python3
import yfinance as yf
import json
import os
import time
from datetime import datetime, timedelta
import subprocess

# ---------------------------- CONFIG ----------------------------

CURRENCIES = {
    "USDEUR=X": "Euro Area",
    "USDGBP=X": "United Kingdom",
    "USDJPY=X": "Japan",
    "USDNZD=X": "New Zealand",
    "USDMXN=X": "Mexico",
    "USDCNY=X": "China",
}

# Big Mac GDP-adjusted PPP misalignment (in %)
PPP_MISALIGN = {
    "USDEUR=X": 15.2,
    "USDGBP=X": 13.5,
    "USDJPY=X": -46.3,
    "USDNZD=X": -14.8,
    "USDMXN=X": -12.2,
    "USDCNY=X": -40.9,
}

# Big Mac GDP-adjusted PPP misalignment (in %)
NAME = {
    "USDEUR=X": "Euro",
    "USDGBP=X": "Pound sterling",
    "USDJPY=X": "Japanese yen",
    "USDNZD=X": "New Zealand dollar",
    "USDMXN=X": "Mexican peso",
    "USDCNY=X": "Chinese yuan",

}

END = datetime.now()
START = END - timedelta(days=365)

DEFAULT_BASE_DIR = "/Users/paulgrass/Library/Mobile Documents/com~apple~CloudDocs/Documents/Programming/Git/pilot3-asset-data/d2"

SLEEP_BETWEEN = 8
MAX_RETRIES = 3
RETRY_DELAY = 4

FORBIDDEN_SUBSTRINGS = ["pilot2-asset-data"]


# ---------------------------- HELPERS ----------------------------

def guard_path(path):
    norm = os.path.normpath(path)
    for bad in FORBIDDEN_SUBSTRINGS:
        if bad in norm:
            raise RuntimeError(f"Refusing to write into forbidden path: {norm}")

def safe_mkdirs(path):
    guard_path(path)
    os.makedirs(path, exist_ok=True)

def make_daily_dir(base):
    d = os.path.join(base, "runs", f"run_{datetime.now().strftime('%Y-%m-%d')}")
    safe_mkdirs(d)
    return d

def ensure_current(base):
    cur = os.path.join(base, "current_fx")
    safe_mkdirs(cur)
    return cur

def write_json(path, obj):
    guard_path(path)
    with open(path, "w") as f:
        json.dump(obj, f, indent=2)

def copy_to_current(src, current):
    guard_path(current)
    dst = os.path.join(current, os.path.basename(src))
    with open(src, "rb") as s, open(dst, "wb") as d:
        d.write(s.read())
    return dst

def git_commit_and_push(repo_root, run_dir, current_dir, branch="main"):
    cwd = os.getcwd()
    os.chdir(repo_root)
    try:
        diff = subprocess.run(["git", "status", "--porcelain", current_dir, run_dir],
                              capture_output=True, text=True)
        if diff.stdout.strip() == "":
            print("No changes to commit.")
            return

        subprocess.run(["git", "add", current_dir, run_dir], check=True)
        msg = f"FX data update: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
        subprocess.run(["git", "commit", "-m", msg], check=True)
        subprocess.run(["git", "push", "origin", branch], check=True)
        print("Pushed to GitHub.")
    finally:
        os.chdir(cwd)


# ---------------------------- MAIN ----------------------------

def main():
    base = os.getenv("DATA_BASE_DIR", DEFAULT_BASE_DIR)
    guard_path(base)
    safe_mkdirs(base)

    run_dir = make_daily_dir(base)
    current = ensure_current(base)

    print(f"Run dir: {run_dir}")
    print(f"Current dir: {current}")

    summary = {
        "started_at": datetime.now().isoformat(timespec="seconds"),
        "window_start": START.strftime("%Y-%m-%d"),
        "window_end": END.strftime("%Y-%m-%d"),
        "files": [],
        "errors": [],
    }

    # ------------------ FX PRICE DATA ------------------
    # ------------------ FX PRICE DATA ------------------
    fx_data = {}

    for ticker, country in CURRENCIES.items():
        print(f"Fetching FX data for {ticker}...")

        err = None
        for attempt in range(1, MAX_RETRIES + 1):
            try:
                df = yf.Ticker(ticker).history(
                    start=START.strftime("%Y-%m-%d"),
                    end=END.strftime("%Y-%m-%d"),
                    auto_adjust=True,
                )
                if df.empty:
                    raise ValueError("Empty history returned")

                prices = [
                    [int(ts.timestamp() * 1000), round(float(row["Close"]), 5)]
                    for ts, row in df.iterrows()
                ]

                if not prices:
                    raise ValueError("No valid prices")

                short = ticker.replace("=X", "").replace("/", "").lower()
                out_path = os.path.join(run_dir, f"{short}_365d.json")

                write_json(out_path, {"prices": prices})
                copy_to_current(out_path, current)

                fx_data[ticker] = {
                    "country": country,
                    "name": NAME[ticker],                 # <-- added
                    "ppp_misalign_percent": PPP_MISALIGN[ticker],
                }

                summary["files"].append({
                    "ticker": ticker,
                    "file": out_path,
                    "points": len(prices),
                })

                print(f"  ‚úî wrote {short}_365d.json ({len(prices)} pts)")
                err = None
                break

            except Exception as e:
                err = str(e)
                print(f"  ‚ö† attempt {attempt}: {err}")
                time.sleep(RETRY_DELAY)

        if err:
            summary["errors"].append({"ticker": ticker, "error": err})

        time.sleep(SLEEP_BETWEEN)

    # ------------------ FUNDAMENTALS ------------------
    fundamentals_path = os.path.join(run_dir, "fundamentals.json")
    fundamentals_cur = os.path.join(current, "fundamentals.json")

    write_json(fundamentals_path, fx_data)
    write_json(fundamentals_cur, fx_data)

    summary["fundamentals_file"] = fundamentals_cur
    summary_path = os.path.join(run_dir, "summary.json")
    write_json(summary_path, summary)
    write_json(os.path.join(current, "summary.json"), summary)

    # Git commit
    git_commit_and_push(base, run_dir, current)

    print("Done.")


if __name__ == "__main__":
    main()


Run dir: /Users/paulgrass/Library/Mobile Documents/com~apple~CloudDocs/Documents/Programming/Git/pilot3-asset-data/d2/runs/run_2025-12-13
Current dir: /Users/paulgrass/Library/Mobile Documents/com~apple~CloudDocs/Documents/Programming/Git/pilot3-asset-data/d2/current_fx
Fetching FX data for USDEUR=X...
  ‚úî wrote usdeur_365d.json (258 pts)
Fetching FX data for USDGBP=X...
  ‚úî wrote usdgbp_365d.json (258 pts)
Fetching FX data for USDJPY=X...
  ‚úî wrote usdjpy_365d.json (258 pts)
Fetching FX data for USDNZD=X...
  ‚úî wrote usdnzd_365d.json (258 pts)
Fetching FX data for USDMXN=X...
  ‚úî wrote usdmxn_365d.json (258 pts)
Fetching FX data for USDCNY=X...
  ‚úî wrote usdcny_365d.json (258 pts)
[main 1b72306] FX data update: 2025-12-13 13:27:52
 16 files changed, 12574 insertions(+)
 create mode 100644 d2/current_fx/fundamentals.json
 create mode 100644 d2/current_fx/summary.json
 create mode 100644 d2/current_fx/usdcny_365d.json
 create mode 100644 d2/current_fx/usdeur_365d.json
 creat

To github.com:pagrass/pilot1-asset-data.git
   93cf2e9..1b72306  main -> main


In [11]:
#Code Cryptos

import csv
import subprocess
import json
import os
import time
from datetime import datetime, timedelta

# Deps: pip install yfinance
import yfinance as yf

# -------------------- Config --------------------

#STOCKS  = ["CSCO", "MSFT", "TER", "AKAM", "NTAP", "AMKR"]

#Replication cryptos


#CRYPTOS = ["BCH-USD", "ETH-USD", "XMR-USD"] 

#Own predictions cryptos
#CRYPTOS = ["XRP-USD", "BNB-USD", "BTC-USD"]

#All cryptos
CRYPTOS = ["BTC-USD", "XMR-USD", "BNB-USD", "BCH-USD", "ETH-USD", "TRX-USD"]

TICKERS = CRYPTOS
#+ CRYPTOS

FUND_TICKERS = ["CSCO", "MSFT", "TER", "AKAM", "NTAP", "AMKR"]

SECTOR_MAP = {
    "CSCO": "Information Technology",
    "MSFT": "Information Technology",
    "TER": "Information Technology",
    "AKAM": "Information Technology",
    "NTAP":  "Information Technology",
    "AMKR":   "Information Technology",
}

# Date window
END   = datetime.now()
START = END - timedelta(days=365)

# Default base dir (each run gets a new subfolder here)
DEFAULT_BASE_DIR = "/Users/paulgrass/Library/Mobile Documents/com~apple~CloudDocs/Documents/Programming/Git/pilot3-asset-data/main/cryptos"

# Path to CSV ‚Äúdictionary‚Äù
CSV_METRICS_PATH = "/Users/paulgrass/Library/Mobile Documents/com~apple~CloudDocs/Documents/Model Spillovers/Data/Stock Selection/preselection/candidate_subset_all.csv"

# Rate limiting
SLEEP_BETWEEN_TICKERS_SEC = 10
MAX_RETRIES = 3
RETRY_DELAY_SEC = 5

# Safety: never allow writing into these substrings
FORBIDDEN_SUBSTRINGS = ["pilot2-asset-data"]

# -------------------- Helpers --------------------

def git_commit_and_push(repo_root: str, run_dir: str, current_dir: str, branch: str = "main"):
    # Only commit if there are changes in current/ or today's run folder
    rel_run = os.path.relpath(run_dir, repo_root)
    rel_cur = os.path.relpath(current_dir, repo_root)

    cwd_before = os.getcwd()
    os.chdir(repo_root)
    try:
        diff = subprocess.run(
            ["git", "status", "--porcelain", rel_cur, rel_run],
            capture_output=True, text=True
        )
        if diff.returncode != 0:
            print("‚ö†Ô∏è  git status failed; not pushing.")
            return
        if diff.stdout.strip() == "":
            print("‚ÑπÔ∏è  No changes to commit; skipping push.")
            return

        # Stage just what we care about
        subprocess.run(["git", "add", rel_cur, rel_run], check=True)

        # Commit
        msg = f"Update data: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
        commit = subprocess.run(["git", "commit", "-m", msg], capture_output=True, text=True)
        if commit.returncode != 0:
            print(commit.stdout or commit.stderr or "‚ÑπÔ∏è  Nothing to commit.")
            return

        # Push
        subprocess.run(["git", "push", "origin", branch], check=True)
        print("‚úÖ Pushed to origin; GitHub Action will purge jsDelivr cache.")
    finally:
        os.chdir(cwd_before)

def guard_path(path: str):
    norm = os.path.normpath(path)
    for bad in FORBIDDEN_SUBSTRINGS:
        if bad in norm:
            raise RuntimeError(f"Refusing to write into forbidden path: {norm}")

def safe_mkdirs(path: str):
    guard_path(path)
    os.makedirs(path, exist_ok=True)

def make_daily_run_dir(base_dir: str) -> str:
    date_str = datetime.now().strftime("%Y-%m-%d")
    run_dir = os.path.join(base_dir, "runs", f"run_{date_str}")
    safe_mkdirs(run_dir)
    return run_dir

def ensure_current_dir(base_dir: str) -> str:
    cur = os.path.join(base_dir, "current")
    safe_mkdirs(cur)
    return cur

def copy_to_current(src_path: str, current_dir: str):
    guard_path(current_dir)
    dst_path = os.path.join(current_dir, os.path.basename(src_path))
    with open(src_path, "rb") as s, open(dst_path, "wb") as d:
        d.write(s.read())
    return dst_path

def write_json(path: str, obj):
    guard_path(path)
    with open(path, "w") as f:
        json.dump(obj, f, indent=2)

# ---------- CSV metrics ----------

def _coerce_number(val):
    """Coerce CSV field to float if possible; return None for empty/invalid."""
    if val is None:
        return None
    s = str(val).strip()
    if s == "" or s.lower() in {"na", "nan", "none"}:
        return None
    try:
        return float(s.replace(",", ""))
    except Exception:
        return None

def fetch_metrics_from_csv(symbols, csv_path, sector_map=None):
    """
    Pull marketcap, pb_current, pb_current_pctile, div_y from a CSV keyed by ticker.

    Post-processing:
      - marketcap -> divide by 1,000,000 and round to 2 decimals (millions)
      - pb_current_pctile -> round to 2 decimals
      - div_y (aka div_yield/dividend_yield) -> divide by 100 (to decimal)
      - valuation (new): Low / Mid / High based on pb_current_pctile
    """
    lookup = {}
    with open(csv_path, newline="") as f:
        reader = csv.DictReader(f)
        for row in reader:
            row_lower = {k.lower(): v for k, v in row.items()}
            tk = (row_lower.get("ticker") or row_lower.get("symbol") or "").strip().upper()
            if tk:
                lookup[tk] = row_lower

    print("Has MSFT exact?:", "MSFT" in lookup)

    # show any keys that "contain" MSFT (suffixes, weird formats)
    msft_like = [k for k in lookup.keys() if "MSFT" in k]
    print("MSFT-like keys:", msft_like[:50])
    out = {}
    for sym in symbols:
        key = sym.upper()
        row = lookup.get(key)
        if not row:
            out[key] = {"error": "Ticker not found in CSV", "sector": (sector_map or {}).get(key)}
            continue

        mc_raw = _coerce_number(row.get("marketcap"))
        pb = _coerce_number(row.get("pb_current"))
        pb_pct_raw = _coerce_number(row.get("pb_current_pctile"))

        # Flexible dividend yield column handling
        div_candidates = ["div_y", "div_yield", "dividend_yield"]
        div_raw = None
        for c in div_candidates:
            if c in row:
                div_raw = _coerce_number(row.get(c))
                if div_raw is not None:
                    break

        # ---- Post-processing transforms ----
        mc_millions = round(mc_raw / 1_000_000, 2) if mc_raw is not None else None
        pb_pct = round(pb_pct_raw * 100, 0) if pb_pct_raw is not None else None
        div_val = (div_raw) if div_raw is not None else None

        # ---- Valuation classification ----
        if pb_pct is None:
            valuation = None
        elif pb_pct <= 30:
            valuation = "Low"
        elif pb_pct >= 70:
            valuation = "High"
        else:
            valuation = "Mid"

        out[key] = {
            "marketcap": mc_millions,
            "pb_current": pb,
            "pb_current_pctile": pb_pct,
            "div_y": div_val,
            "valuation": valuation,   # üëà new field
        }
        if sector_map:
            out[key]["sector"] = sector_map.get(key)

    return out


# -------------------- Main --------------------

def main():
    base_dir = os.getenv("DATA_BASE_DIR", DEFAULT_BASE_DIR)
    guard_path(base_dir)
    safe_mkdirs(base_dir)

    run_dir = make_daily_run_dir(base_dir)
    current_dir = ensure_current_dir(base_dir)

    print(f"üìÅ Daily archive folder: {run_dir}")
    print(f"üìÇ Current folder for Qualtrics: {current_dir}")

    run_summary = {
        "started_at": datetime.now().isoformat(timespec="seconds"),
        "window_start": START.strftime("%Y-%m-%d"),
        "window_end": END.strftime("%Y-%m-%d"),
        "files": [],
        "errors": [],
    }

    # ---- Price history (yfinance) ----
    for sym in TICKERS:
        print(f"‚è≥ Fetching 365-day data for {sym}‚Ä¶")
        last_err = None
        for attempt in range(1, MAX_RETRIES + 1):
            try:
                tkr = yf.Ticker(sym)
                df = tkr.history(
                    start=START.strftime("%Y-%m-%d"),
                    end=END.strftime("%Y-%m-%d"),
                    auto_adjust=True,
                )
                if df is None or df.empty:
                    raise ValueError("Empty dataframe returned.")

                pts = [
                    [int(row_ts.timestamp() * 1000), round(float(row["Close"]), 2)]
                    for row_ts, row in df.iterrows()
                    if row.get("Close") is not None
                ]
                if not pts:
                    raise ValueError("No valid close prices found.")

                out_sym = sym.replace("-USD", "").replace(".", "").lower()

                out_path = os.path.join(run_dir, f"{out_sym}_365d.json")
                write_json(out_path, {"prices": pts})

                copy_to_current(out_path, current_dir)

                print(f" ‚úÖ Wrote {out_sym}_365d.json ({len(pts)} points)")

                run_summary["files"].append({
                    "symbol": sym,
                    "file": out_path,
                    "points": len(pts),
                    "first_ts": pts[0][0],
                    "last_ts": pts[-1][0],
                })
                last_err = None
                break
            except Exception as e:
                last_err = str(e)
                print(f"   ‚ö†Ô∏è Attempt {attempt}/{MAX_RETRIES} failed: {last_err}")
                if attempt < MAX_RETRIES:
                    time.sleep(RETRY_DELAY_SEC)

        if last_err:
            run_summary["errors"].append({"symbol": sym, "error": last_err})

        time.sleep(SLEEP_BETWEEN_TICKERS_SEC)

    # ---- Metrics from CSV (no Yahoo fundamentals) ----
    try:
        funds = fetch_metrics_from_csv(
            FUND_TICKERS,
            CSV_METRICS_PATH,
            sector_map=SECTOR_MAP
        )
    except Exception as e:
        funds = {sym: {"error": f"CSV read failed: {e}", "sector": SECTOR_MAP.get(sym)} for sym in FUND_TICKERS}

    funds_run_path = os.path.join(run_dir, "fundamentals.json")
    funds_cur_path = os.path.join(current_dir, "fundamentals.json")
    write_json(funds_run_path, funds)
    write_json(funds_cur_path, funds)

    run_summary["fundamentals"] = {
        "tickers": FUND_TICKERS,
        "file_run": funds_run_path,
        "file_current": funds_cur_path,
        "source": "csv",
        "csv_path": CSV_METRICS_PATH,
        "fields": ["marketcap", "pb_current", "pb_current_pctile", "div_y"]
    }

    # ---- Save summary ----
    write_json(os.path.join(run_dir, "summary.json"), run_summary)
    write_json(os.path.join(current_dir, "summary.json"), run_summary)

    # ---- Auto-commit & push ----
    git_commit_and_push(
        repo_root=base_dir,
        run_dir=run_dir,
        current_dir=current_dir,
        branch="main"
    )

    print("üèÅ Done.")
    if run_summary["errors"]:
        print("Some symbols failed (see summary.json).")
    else:
        print("All symbols fetched successfully.")
    print(f"Latest 'current' folder ready for Qualtrics: {current_dir}")

if __name__ == "__main__":
    main()


üìÅ Daily archive folder: /Users/paulgrass/Library/Mobile Documents/com~apple~CloudDocs/Documents/Programming/Git/pilot3-asset-data/main/cryptos/runs/run_2025-12-18
üìÇ Current folder for Qualtrics: /Users/paulgrass/Library/Mobile Documents/com~apple~CloudDocs/Documents/Programming/Git/pilot3-asset-data/main/cryptos/current
‚è≥ Fetching 365-day data for BTC-USD‚Ä¶
 ‚úÖ Wrote btc_365d.json (365 points)
‚è≥ Fetching 365-day data for XMR-USD‚Ä¶
 ‚úÖ Wrote xmr_365d.json (365 points)
‚è≥ Fetching 365-day data for BNB-USD‚Ä¶
 ‚úÖ Wrote bnb_365d.json (365 points)
‚è≥ Fetching 365-day data for BCH-USD‚Ä¶
 ‚úÖ Wrote bch_365d.json (365 points)
‚è≥ Fetching 365-day data for ETH-USD‚Ä¶
 ‚úÖ Wrote eth_365d.json (365 points)
‚è≥ Fetching 365-day data for TRX-USD‚Ä¶
 ‚úÖ Wrote trx_365d.json (365 points)
Has MSFT exact?: True
MSFT-like keys: ['MSFT']
‚úÖ Pushed to origin; GitHub Action will purge jsDelivr cache.
üèÅ Done.
All symbols fetched successfully.
Latest 'current' folder ready for Qualtrics

To github.com:pagrass/pilot1-asset-data.git
   60e1150..dd69c46  main -> main


In [12]:
#Code Stocks

import csv
import subprocess
import json
import os
import time
from datetime import datetime, timedelta

# Deps: pip install yfinance
import yfinance as yf

# -------------------- Config --------------------

STOCKS  = ["CSCO", "MSFT", "TER", "AKAM", "NTAP", "AMKR"]

#Replication cryptos


#CRYPTOS = ["BCH-USD", "ETH-USD", "XMR-USD"] 

#Own predictions cryptos
#CRYPTOS = ["XRP-USD", "BNB-USD", "BTC-USD"]

#All cryptos
#CRYPTOS = ["BTC-USD", "XMR-USD", "BNB-USD", "BCH-USD", "ETH-USD", "TRX-USD"]

TICKERS = STOCKS
#+ CRYPTOS

FUND_TICKERS = ["CSCO", "MSFT", "TER", "AKAM", "NTAP", "AMKR"]

SECTOR_MAP = {
    "CSCO": "Information Technology",
    "MSFT": "Information Technology",
    "TER": "Information Technology",
    "AKAM": "Information Technology",
    "NTAP":  "Information Technology",
    "AMKR":   "Information Technology",
}

# Date window
END   = datetime.now()
START = END - timedelta(days=365)

# Default base dir (each run gets a new subfolder here)
DEFAULT_BASE_DIR = "/Users/paulgrass/Library/Mobile Documents/com~apple~CloudDocs/Documents/Programming/Git/pilot3-asset-data/finaltest/stocks"

# Path to CSV ‚Äúdictionary‚Äù
CSV_METRICS_PATH = "/Users/paulgrass/Library/Mobile Documents/com~apple~CloudDocs/Documents/Model Spillovers/Data/Stock Selection/preselection/candidate_subset_all.csv"

# Rate limiting
SLEEP_BETWEEN_TICKERS_SEC = 10
MAX_RETRIES = 3
RETRY_DELAY_SEC = 5

# Safety: never allow writing into these substrings
FORBIDDEN_SUBSTRINGS = ["pilot2-asset-data"]

# -------------------- Helpers --------------------

def git_commit_and_push(repo_root: str, run_dir: str, current_dir: str, branch: str = "main"):
    # Only commit if there are changes in current/ or today's run folder
    rel_run = os.path.relpath(run_dir, repo_root)
    rel_cur = os.path.relpath(current_dir, repo_root)

    cwd_before = os.getcwd()
    os.chdir(repo_root)
    try:
        diff = subprocess.run(
            ["git", "status", "--porcelain", rel_cur, rel_run],
            capture_output=True, text=True
        )
        if diff.returncode != 0:
            print("‚ö†Ô∏è  git status failed; not pushing.")
            return
        if diff.stdout.strip() == "":
            print("‚ÑπÔ∏è  No changes to commit; skipping push.")
            return

        # Stage just what we care about
        subprocess.run(["git", "add", rel_cur, rel_run], check=True)

        # Commit
        msg = f"Update data: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
        commit = subprocess.run(["git", "commit", "-m", msg], capture_output=True, text=True)
        if commit.returncode != 0:
            print(commit.stdout or commit.stderr or "‚ÑπÔ∏è  Nothing to commit.")
            return

        # Push
        subprocess.run(["git", "push", "origin", branch], check=True)
        print("‚úÖ Pushed to origin; GitHub Action will purge jsDelivr cache.")
    finally:
        os.chdir(cwd_before)

def guard_path(path: str):
    norm = os.path.normpath(path)
    for bad in FORBIDDEN_SUBSTRINGS:
        if bad in norm:
            raise RuntimeError(f"Refusing to write into forbidden path: {norm}")

def safe_mkdirs(path: str):
    guard_path(path)
    os.makedirs(path, exist_ok=True)

def make_daily_run_dir(base_dir: str) -> str:
    date_str = datetime.now().strftime("%Y-%m-%d")
    run_dir = os.path.join(base_dir, "runs", f"run_{date_str}")
    safe_mkdirs(run_dir)
    return run_dir

def ensure_current_dir(base_dir: str) -> str:
    cur = os.path.join(base_dir, "current")
    safe_mkdirs(cur)
    return cur

def copy_to_current(src_path: str, current_dir: str):
    guard_path(current_dir)
    dst_path = os.path.join(current_dir, os.path.basename(src_path))
    with open(src_path, "rb") as s, open(dst_path, "wb") as d:
        d.write(s.read())
    return dst_path

def write_json(path: str, obj):
    guard_path(path)
    with open(path, "w") as f:
        json.dump(obj, f, indent=2)

# ---------- CSV metrics ----------

def _coerce_number(val):
    """Coerce CSV field to float if possible; return None for empty/invalid."""
    if val is None:
        return None
    s = str(val).strip()
    if s == "" or s.lower() in {"na", "nan", "none"}:
        return None
    try:
        return float(s.replace(",", ""))
    except Exception:
        return None

def fetch_metrics_from_csv(symbols, csv_path, sector_map=None):
    """
    Pull marketcap, pb_current, pb_current_pctile, div_y from a CSV keyed by ticker.

    Post-processing:
      - marketcap -> divide by 1,000,000 and round to 2 decimals (millions)
      - pb_current_pctile -> round to 2 decimals
      - div_y (aka div_yield/dividend_yield) -> divide by 100 (to decimal)
      - valuation (new): Low / Mid / High based on pb_current_pctile
    """
    lookup = {}
    with open(csv_path, newline="") as f:
        reader = csv.DictReader(f)
        for row in reader:
            row_lower = {k.lower(): v for k, v in row.items()}
            tk = (row_lower.get("ticker") or row_lower.get("symbol") or "").strip().upper()
            if tk:
                lookup[tk] = row_lower

    print("Has MSFT exact?:", "MSFT" in lookup)

    # show any keys that "contain" MSFT (suffixes, weird formats)
    msft_like = [k for k in lookup.keys() if "MSFT" in k]
    print("MSFT-like keys:", msft_like[:50])
    out = {}
    for sym in symbols:
        key = sym.upper()
        row = lookup.get(key)
        if not row:
            out[key] = {"error": "Ticker not found in CSV", "sector": (sector_map or {}).get(key)}
            continue

        mc_raw = _coerce_number(row.get("marketcap"))
        pb = _coerce_number(row.get("pb_current"))
        pb_pct_raw = _coerce_number(row.get("pb_current_pctile"))

        # Flexible dividend yield column handling
        div_candidates = ["div_y", "div_yield", "dividend_yield"]
        div_raw = None
        for c in div_candidates:
            if c in row:
                div_raw = _coerce_number(row.get(c))
                if div_raw is not None:
                    break

        # ---- Post-processing transforms ----
        mc_millions = round(mc_raw / 1_000_000, 2) if mc_raw is not None else None
        pb_pct = round(pb_pct_raw * 100, 0) if pb_pct_raw is not None else None
        div_val = (div_raw) if div_raw is not None else None

        # ---- Valuation classification ----
        if pb_pct is None:
            valuation = None
        elif pb_pct <= 30:
            valuation = "Low"
        elif pb_pct >= 70:
            valuation = "High"
        else:
            valuation = "Mid"

        out[key] = {
            "marketcap": mc_millions,
            "pb_current": pb,
            "pb_current_pctile": pb_pct,
            "div_y": div_val,
            "valuation": valuation,   # üëà new field
        }
        if sector_map:
            out[key]["sector"] = sector_map.get(key)

    return out


# -------------------- Main --------------------

def main():
    base_dir = os.getenv("DATA_BASE_DIR", DEFAULT_BASE_DIR)
    guard_path(base_dir)
    safe_mkdirs(base_dir)

    run_dir = make_daily_run_dir(base_dir)
    current_dir = ensure_current_dir(base_dir)

    print(f"üìÅ Daily archive folder: {run_dir}")
    print(f"üìÇ Current folder for Qualtrics: {current_dir}")

    run_summary = {
        "started_at": datetime.now().isoformat(timespec="seconds"),
        "window_start": START.strftime("%Y-%m-%d"),
        "window_end": END.strftime("%Y-%m-%d"),
        "files": [],
        "errors": [],
    }

    # ---- Price history (yfinance) ----
    for sym in TICKERS:
        print(f"‚è≥ Fetching 365-day data for {sym}‚Ä¶")
        last_err = None
        for attempt in range(1, MAX_RETRIES + 1):
            try:
                tkr = yf.Ticker(sym)
                df = tkr.history(
                    start=START.strftime("%Y-%m-%d"),
                    end=END.strftime("%Y-%m-%d"),
                    auto_adjust=True,
                )
                if df is None or df.empty:
                    raise ValueError("Empty dataframe returned.")

                pts = [
                    [int(row_ts.timestamp() * 1000), round(float(row["Close"]), 2)]
                    for row_ts, row in df.iterrows()
                    if row.get("Close") is not None
                ]
                if not pts:
                    raise ValueError("No valid close prices found.")

                out_sym = sym.replace("-USD", "").replace(".", "").lower()

                out_path = os.path.join(run_dir, f"{out_sym}_365d.json")
                write_json(out_path, {"prices": pts})

                copy_to_current(out_path, current_dir)

                print(f" ‚úÖ Wrote {out_sym}_365d.json ({len(pts)} points)")

                run_summary["files"].append({
                    "symbol": sym,
                    "file": out_path,
                    "points": len(pts),
                    "first_ts": pts[0][0],
                    "last_ts": pts[-1][0],
                })
                last_err = None
                break
            except Exception as e:
                last_err = str(e)
                print(f"   ‚ö†Ô∏è Attempt {attempt}/{MAX_RETRIES} failed: {last_err}")
                if attempt < MAX_RETRIES:
                    time.sleep(RETRY_DELAY_SEC)

        if last_err:
            run_summary["errors"].append({"symbol": sym, "error": last_err})

        time.sleep(SLEEP_BETWEEN_TICKERS_SEC)

    # ---- Metrics from CSV (no Yahoo fundamentals) ----
    try:
        funds = fetch_metrics_from_csv(
            FUND_TICKERS,
            CSV_METRICS_PATH,
            sector_map=SECTOR_MAP
        )
    except Exception as e:
        funds = {sym: {"error": f"CSV read failed: {e}", "sector": SECTOR_MAP.get(sym)} for sym in FUND_TICKERS}

    funds_run_path = os.path.join(run_dir, "fundamentals.json")
    funds_cur_path = os.path.join(current_dir, "fundamentals.json")
    write_json(funds_run_path, funds)
    write_json(funds_cur_path, funds)

    run_summary["fundamentals"] = {
        "tickers": FUND_TICKERS,
        "file_run": funds_run_path,
        "file_current": funds_cur_path,
        "source": "csv",
        "csv_path": CSV_METRICS_PATH,
        "fields": ["marketcap", "pb_current", "pb_current_pctile", "div_y"]
    }

    # ---- Save summary ----
    write_json(os.path.join(run_dir, "summary.json"), run_summary)
    write_json(os.path.join(current_dir, "summary.json"), run_summary)

    # ---- Auto-commit & push ----
    git_commit_and_push(
        repo_root=base_dir,
        run_dir=run_dir,
        current_dir=current_dir,
        branch="main"
    )

    print("üèÅ Done.")
    if run_summary["errors"]:
        print("Some symbols failed (see summary.json).")
    else:
        print("All symbols fetched successfully.")
    print(f"Latest 'current' folder ready for Qualtrics: {current_dir}")

if __name__ == "__main__":
    main()


üìÅ Daily archive folder: /Users/paulgrass/Library/Mobile Documents/com~apple~CloudDocs/Documents/Programming/Git/pilot3-asset-data/finaltest/stocks/runs/run_2025-12-18
üìÇ Current folder for Qualtrics: /Users/paulgrass/Library/Mobile Documents/com~apple~CloudDocs/Documents/Programming/Git/pilot3-asset-data/finaltest/stocks/current
‚è≥ Fetching 365-day data for CSCO‚Ä¶
 ‚úÖ Wrote csco_365d.json (250 points)
‚è≥ Fetching 365-day data for MSFT‚Ä¶
 ‚úÖ Wrote msft_365d.json (250 points)
‚è≥ Fetching 365-day data for TER‚Ä¶
 ‚úÖ Wrote ter_365d.json (250 points)
‚è≥ Fetching 365-day data for AKAM‚Ä¶
 ‚úÖ Wrote akam_365d.json (250 points)
‚è≥ Fetching 365-day data for NTAP‚Ä¶
 ‚úÖ Wrote ntap_365d.json (250 points)
‚è≥ Fetching 365-day data for AMKR‚Ä¶
 ‚úÖ Wrote amkr_365d.json (250 points)
Has MSFT exact?: True
MSFT-like keys: ['MSFT']
‚úÖ Pushed to origin; GitHub Action will purge jsDelivr cache.
üèÅ Done.
All symbols fetched successfully.
Latest 'current' folder ready for Qualtrics: /Use

To github.com:pagrass/pilot1-asset-data.git
   dd69c46..f4035fc  main -> main


In [None]:
#CODE Own Belief Cryptos
import csv
import subprocess
import json
import os
import time
from datetime import datetime, timedelta

# Deps: pip install yfinance
import yfinance as yf

# -------------------- Config --------------------

#STOCKS  = ["CSCO", "TMUS", "TWLO", "PEGA", "ROG", "PD"]

#Replication cryptos


#CRYPTOS = ["BCH-USD", "ETH-USD", "XMR-USD"] 

#Own predictions cryptos
#CRYPTOS = ["XRP-USD", "BNB-USD", "BTC-USD"]

#All cryptos
CRYPTOS = ["ETH-USD", "XMR-USD", "BCH-USD", ""]

TICKERS = CRYPTOS
#+ CRYPTOS

FUND_TICKERS = ["CSCO", "TMUS", "TWLO", "PEGA", "ROG", "PD"]

SECTOR_MAP = {
    "CSCO": "Information Technology",
    "TMUS": "Communication Services",
    "TWLO": "Information Technology",
    "PEGA": "Information Technology",
    "ROG":  "Information Technology",
    "PD":   "Information Technology",
}

# Date window
END   = datetime.now()
START = END - timedelta(days=365)

# Default base dir (each run gets a new subfolder here)
DEFAULT_BASE_DIR = "/Users/paulgrass/Library/Mobile Documents/com~apple~CloudDocs/Documents/Programming/Git/pilot3-asset-data/main/replicationcryptos"

# Path to CSV ‚Äúdictionary‚Äù
CSV_METRICS_PATH = "/Users/paulgrass/Library/Mobile Documents/com~apple~CloudDocs/Documents/Model Spillovers/Data/Stock Selection/preselection/candidate_subset_all.csv"

# Rate limiting
SLEEP_BETWEEN_TICKERS_SEC = 10
MAX_RETRIES = 3
RETRY_DELAY_SEC = 5

# Safety: never allow writing into these substrings
FORBIDDEN_SUBSTRINGS = ["pilot2-asset-data"]

# -------------------- Helpers --------------------

def git_commit_and_push(repo_root: str, run_dir: str, current_dir: str, branch: str = "main"):
    # Only commit if there are changes in current/ or today's run folder
    rel_run = os.path.relpath(run_dir, repo_root)
    rel_cur = os.path.relpath(current_dir, repo_root)

    cwd_before = os.getcwd()
    os.chdir(repo_root)
    try:
        diff = subprocess.run(
            ["git", "status", "--porcelain", rel_cur, rel_run],
            capture_output=True, text=True
        )
        if diff.returncode != 0:
            print("‚ö†Ô∏è  git status failed; not pushing.")
            return
        if diff.stdout.strip() == "":
            print("‚ÑπÔ∏è  No changes to commit; skipping push.")
            return

        # Stage just what we care about
        subprocess.run(["git", "add", rel_cur, rel_run], check=True)

        # Commit
        msg = f"Update data: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
        commit = subprocess.run(["git", "commit", "-m", msg], capture_output=True, text=True)
        if commit.returncode != 0:
            print(commit.stdout or commit.stderr or "‚ÑπÔ∏è  Nothing to commit.")
            return

        # Push
        subprocess.run(["git", "push", "origin", branch], check=True)
        print("‚úÖ Pushed to origin; GitHub Action will purge jsDelivr cache.")
    finally:
        os.chdir(cwd_before)

def guard_path(path: str):
    norm = os.path.normpath(path)
    for bad in FORBIDDEN_SUBSTRINGS:
        if bad in norm:
            raise RuntimeError(f"Refusing to write into forbidden path: {norm}")

def safe_mkdirs(path: str):
    guard_path(path)
    os.makedirs(path, exist_ok=True)

def make_daily_run_dir(base_dir: str) -> str:
    date_str = datetime.now().strftime("%Y-%m-%d")
    run_dir = os.path.join(base_dir, "runs", f"run_{date_str}")
    safe_mkdirs(run_dir)
    return run_dir

def ensure_current_dir(base_dir: str) -> str:
    cur = os.path.join(base_dir, "current")
    safe_mkdirs(cur)
    return cur

def copy_to_current(src_path: str, current_dir: str):
    guard_path(current_dir)
    dst_path = os.path.join(current_dir, os.path.basename(src_path))
    with open(src_path, "rb") as s, open(dst_path, "wb") as d:
        d.write(s.read())
    return dst_path

def write_json(path: str, obj):
    guard_path(path)
    with open(path, "w") as f:
        json.dump(obj, f, indent=2)

# ---------- CSV metrics ----------

def _coerce_number(val):
    """Coerce CSV field to float if possible; return None for empty/invalid."""
    if val is None:
        return None
    s = str(val).strip()
    if s == "" or s.lower() in {"na", "nan", "none"}:
        return None
    try:
        return float(s.replace(",", ""))
    except Exception:
        return None

def fetch_metrics_from_csv(symbols, csv_path, sector_map=None):
    """
    Pull marketcap, pb_current, pb_current_pctile, div_y from a CSV keyed by ticker.

    Post-processing:
      - marketcap -> divide by 1,000,000 and round to 2 decimals (millions)
      - pb_current_pctile -> round to 2 decimals
      - div_y (aka div_yield/dividend_yield) -> divide by 100 (to decimal)
      - valuation (new): Low / Mid / High based on pb_current_pctile
    """
    lookup = {}
    with open(csv_path, newline="") as f:
        reader = csv.DictReader(f)
        for row in reader:
            row_lower = {k.lower(): v for k, v in row.items()}
            tk = (row_lower.get("ticker") or row_lower.get("symbol") or "").strip().upper()
            if tk:
                lookup[tk] = row_lower

    out = {}
    for sym in symbols:
        key = sym.upper()
        row = lookup.get(key)
        if not row:
            out[key] = {"error": "Ticker not found in CSV", "sector": (sector_map or {}).get(key)}
            continue

        mc_raw = _coerce_number(row.get("marketcap"))
        pb = _coerce_number(row.get("pb_current"))
        pb_pct_raw = _coerce_number(row.get("pb_current_pctile"))

        # Flexible dividend yield column handling
        div_candidates = ["div_y", "div_yield", "dividend_yield"]
        div_raw = None
        for c in div_candidates:
            if c in row:
                div_raw = _coerce_number(row.get(c))
                if div_raw is not None:
                    break

        # ---- Post-processing transforms ----
        mc_millions = round(mc_raw / 1_000_000, 2) if mc_raw is not None else None
        pb_pct = round(pb_pct_raw * 100, 0) if pb_pct_raw is not None else None
        div_val = (div_raw) if div_raw is not None else None

        # ---- Valuation classification ----
        if pb_pct is None:
            valuation = None
        elif pb_pct <= 33:
            valuation = "Low"
        elif pb_pct >= 67:
            valuation = "High"
        else:
            valuation = "Mid"

        out[key] = {
            "marketcap": mc_millions,
            "pb_current": pb,
            "pb_current_pctile": pb_pct,
            "div_y": div_val,
            "valuation": valuation,   # üëà new field
        }
        if sector_map:
            out[key]["sector"] = sector_map.get(key)

    return out


# -------------------- Main --------------------

def main():
    base_dir = os.getenv("DATA_BASE_DIR", DEFAULT_BASE_DIR)
    guard_path(base_dir)
    safe_mkdirs(base_dir)

    run_dir = make_daily_run_dir(base_dir)
    current_dir = ensure_current_dir(base_dir)

    print(f"üìÅ Daily archive folder: {run_dir}")
    print(f"üìÇ Current folder for Qualtrics: {current_dir}")

    run_summary = {
        "started_at": datetime.now().isoformat(timespec="seconds"),
        "window_start": START.strftime("%Y-%m-%d"),
        "window_end": END.strftime("%Y-%m-%d"),
        "files": [],
        "errors": [],
    }

    # ---- Price history (yfinance) ----
    for sym in TICKERS:
        print(f"‚è≥ Fetching 365-day data for {sym}‚Ä¶")
        last_err = None
        for attempt in range(1, MAX_RETRIES + 1):
            try:
                tkr = yf.Ticker(sym)
                df = tkr.history(
                    start=START.strftime("%Y-%m-%d"),
                    end=END.strftime("%Y-%m-%d"),
                    auto_adjust=True,
                )
                if df is None or df.empty:
                    raise ValueError("Empty dataframe returned.")

                pts = [
                    [int(row_ts.timestamp() * 1000), round(float(row["Close"]), 2)]
                    for row_ts, row in df.iterrows()
                    if row.get("Close") is not None
                ]
                if not pts:
                    raise ValueError("No valid close prices found.")

                out_sym = sym.replace("-USD", "").replace(".", "").lower()

                out_path = os.path.join(run_dir, f"{out_sym}_365d.json")
                write_json(out_path, {"prices": pts})

                copy_to_current(out_path, current_dir)

                print(f" ‚úÖ Wrote {out_sym}_365d.json ({len(pts)} points)")

                run_summary["files"].append({
                    "symbol": sym,
                    "file": out_path,
                    "points": len(pts),
                    "first_ts": pts[0][0],
                    "last_ts": pts[-1][0],
                })
                last_err = None
                break
            except Exception as e:
                last_err = str(e)
                print(f"   ‚ö†Ô∏è Attempt {attempt}/{MAX_RETRIES} failed: {last_err}")
                if attempt < MAX_RETRIES:
                    time.sleep(RETRY_DELAY_SEC)

        if last_err:
            run_summary["errors"].append({"symbol": sym, "error": last_err})

        time.sleep(SLEEP_BETWEEN_TICKERS_SEC)

    # ---- Metrics from CSV (no Yahoo fundamentals) ----
    try:
        funds = fetch_metrics_from_csv(
            FUND_TICKERS,
            CSV_METRICS_PATH,
            sector_map=SECTOR_MAP
        )
    except Exception as e:
        funds = {sym: {"error": f"CSV read failed: {e}", "sector": SECTOR_MAP.get(sym)} for sym in FUND_TICKERS}

    funds_run_path = os.path.join(run_dir, "fundamentals.json")
    funds_cur_path = os.path.join(current_dir, "fundamentals.json")
    write_json(funds_run_path, funds)
    write_json(funds_cur_path, funds)

    run_summary["fundamentals"] = {
        "tickers": FUND_TICKERS,
        "file_run": funds_run_path,
        "file_current": funds_cur_path,
        "source": "csv",
        "csv_path": CSV_METRICS_PATH,
        "fields": ["marketcap", "pb_current", "pb_current_pctile", "div_y"]
    }

    # ---- Save summary ----
    write_json(os.path.join(run_dir, "summary.json"), run_summary)
    write_json(os.path.join(current_dir, "summary.json"), run_summary)

    # ---- Auto-commit & push ----
    git_commit_and_push(
        repo_root=base_dir,
        run_dir=run_dir,
        current_dir=current_dir,
        branch="main"
    )

    print("üèÅ Done.")
    if run_summary["errors"]:
        print("Some symbols failed (see summary.json).")
    else:
        print("All symbols fetched successfully.")
    print(f"Latest 'current' folder ready for Qualtrics: {current_dir}")

if __name__ == "__main__":
    main()


üìÅ Daily archive folder: /Users/paulgrass/Library/Mobile Documents/com~apple~CloudDocs/Documents/Programming/Git/pilot3-asset-data/main/replicationcryptos/runs/run_2025-12-17
üìÇ Current folder for Qualtrics: /Users/paulgrass/Library/Mobile Documents/com~apple~CloudDocs/Documents/Programming/Git/pilot3-asset-data/main/replicationcryptos/current
‚è≥ Fetching 365-day data for ETH-USD‚Ä¶
 ‚úÖ Wrote eth_365d.json (365 points)
‚è≥ Fetching 365-day data for XMR-USD‚Ä¶
 ‚úÖ Wrote xmr_365d.json (365 points)
‚è≥ Fetching 365-day data for BCH-USD‚Ä¶
 ‚úÖ Wrote bch_365d.json (365 points)
‚úÖ Pushed to origin; GitHub Action will purge jsDelivr cache.
üèÅ Done.
All symbols fetched successfully.
Latest 'current' folder ready for Qualtrics: /Users/paulgrass/Library/Mobile Documents/com~apple~CloudDocs/Documents/Programming/Git/pilot3-asset-data/main/replicationcryptos/current


To github.com:pagrass/pilot1-asset-data.git
   d0dca68..60e1150  main -> main


In [None]:
#CODE Own Belief Cryptos
import csv
import subprocess
import json
import os
import time
from datetime import datetime, timedelta

# Deps: pip install yfinance
import yfinance as yf

# -------------------- Config --------------------

#STOCKS  = ["CSCO", "TMUS", "TWLO", "PEGA", "ROG", "PD"]

#Replication cryptos


#CRYPTOS = ["BCH-USD", "ETH-USD", "XMR-USD"] 

#Own predictions cryptos
#CRYPTOS = ["XRP-USD", "BNB-USD", "BTC-USD"]

#All cryptos
CRYPTOS = ["BTC-USD", "XMR-USD", "BNB-USD", "BCH-USD", "ETH-USD", "TRX-USD"]

TICKERS = CRYPTOS
#+ CRYPTOS

FUND_TICKERS = ["CSCO", "TMUS", "TWLO", "PEGA", "ROG", "PD"]

SECTOR_MAP = {
    "CSCO": "Information Technology",
    "TMUS": "Communication Services",
    "TWLO": "Information Technology",
    "PEGA": "Information Technology",
    "ROG":  "Information Technology",
    "PD":   "Information Technology",
}

# Date window
END   = datetime.now()
START = END - timedelta(days=365)

# Default base dir (each run gets a new subfolder here)
DEFAULT_BASE_DIR = "/Users/paulgrass/Library/Mobile Documents/com~apple~CloudDocs/Documents/Programming/Git/pilot3-asset-data/finalpilot/crypto"

# Path to CSV ‚Äúdictionary‚Äù
CSV_METRICS_PATH = "/Users/paulgrass/Library/Mobile Documents/com~apple~CloudDocs/Documents/Model Spillovers/Data/Stock Selection/preselection/candidate_subset_all.csv"

# Rate limiting
SLEEP_BETWEEN_TICKERS_SEC = 10
MAX_RETRIES = 3
RETRY_DELAY_SEC = 5

# Safety: never allow writing into these substrings
FORBIDDEN_SUBSTRINGS = ["pilot2-asset-data"]

# -------------------- Helpers --------------------

def git_commit_and_push(repo_root: str, run_dir: str, current_dir: str, branch: str = "main"):
    # Only commit if there are changes in current/ or today's run folder
    rel_run = os.path.relpath(run_dir, repo_root)
    rel_cur = os.path.relpath(current_dir, repo_root)

    cwd_before = os.getcwd()
    os.chdir(repo_root)
    try:
        diff = subprocess.run(
            ["git", "status", "--porcelain", rel_cur, rel_run],
            capture_output=True, text=True
        )
        if diff.returncode != 0:
            print("‚ö†Ô∏è  git status failed; not pushing.")
            return
        if diff.stdout.strip() == "":
            print("‚ÑπÔ∏è  No changes to commit; skipping push.")
            return

        # Stage just what we care about
        subprocess.run(["git", "add", rel_cur, rel_run], check=True)

        # Commit
        msg = f"Update data: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
        commit = subprocess.run(["git", "commit", "-m", msg], capture_output=True, text=True)
        if commit.returncode != 0:
            print(commit.stdout or commit.stderr or "‚ÑπÔ∏è  Nothing to commit.")
            return

        # Push
        subprocess.run(["git", "push", "origin", branch], check=True)
        print("‚úÖ Pushed to origin; GitHub Action will purge jsDelivr cache.")
    finally:
        os.chdir(cwd_before)

def guard_path(path: str):
    norm = os.path.normpath(path)
    for bad in FORBIDDEN_SUBSTRINGS:
        if bad in norm:
            raise RuntimeError(f"Refusing to write into forbidden path: {norm}")

def safe_mkdirs(path: str):
    guard_path(path)
    os.makedirs(path, exist_ok=True)

def make_daily_run_dir(base_dir: str) -> str:
    date_str = datetime.now().strftime("%Y-%m-%d")
    run_dir = os.path.join(base_dir, "runs", f"run_{date_str}")
    safe_mkdirs(run_dir)
    return run_dir

def ensure_current_dir(base_dir: str) -> str:
    cur = os.path.join(base_dir, "current")
    safe_mkdirs(cur)
    return cur

def copy_to_current(src_path: str, current_dir: str):
    guard_path(current_dir)
    dst_path = os.path.join(current_dir, os.path.basename(src_path))
    with open(src_path, "rb") as s, open(dst_path, "wb") as d:
        d.write(s.read())
    return dst_path

def write_json(path: str, obj):
    guard_path(path)
    with open(path, "w") as f:
        json.dump(obj, f, indent=2)

# ---------- CSV metrics ----------

def _coerce_number(val):
    """Coerce CSV field to float if possible; return None for empty/invalid."""
    if val is None:
        return None
    s = str(val).strip()
    if s == "" or s.lower() in {"na", "nan", "none"}:
        return None
    try:
        return float(s.replace(",", ""))
    except Exception:
        return None

def fetch_metrics_from_csv(symbols, csv_path, sector_map=None):
    """
    Pull marketcap, pb_current, pb_current_pctile, div_y from a CSV keyed by ticker.

    Post-processing:
      - marketcap -> divide by 1,000,000 and round to 2 decimals (millions)
      - pb_current_pctile -> round to 2 decimals
      - div_y (aka div_yield/dividend_yield) -> divide by 100 (to decimal)
      - valuation (new): Low / Mid / High based on pb_current_pctile
    """
    lookup = {}
    with open(csv_path, newline="") as f:
        reader = csv.DictReader(f)
        for row in reader:
            row_lower = {k.lower(): v for k, v in row.items()}
            tk = (row_lower.get("ticker") or row_lower.get("symbol") or "").strip().upper()
            if tk:
                lookup[tk] = row_lower

    out = {}
    for sym in symbols:
        key = sym.upper()
        row = lookup.get(key)
        if not row:
            out[key] = {"error": "Ticker not found in CSV", "sector": (sector_map or {}).get(key)}
            continue

        mc_raw = _coerce_number(row.get("marketcap"))
        pb = _coerce_number(row.get("pb_current"))
        pb_pct_raw = _coerce_number(row.get("pb_current_pctile"))

        # Flexible dividend yield column handling
        div_candidates = ["div_y", "div_yield", "dividend_yield"]
        div_raw = None
        for c in div_candidates:
            if c in row:
                div_raw = _coerce_number(row.get(c))
                if div_raw is not None:
                    break

        # ---- Post-processing transforms ----
        mc_millions = round(mc_raw / 1_000_000, 2) if mc_raw is not None else None
        pb_pct = round(pb_pct_raw * 100, 0) if pb_pct_raw is not None else None
        div_val = (div_raw) if div_raw is not None else None

        # ---- Valuation classification ----
        if pb_pct is None:
            valuation = None
        elif pb_pct <= 33:
            valuation = "Low"
        elif pb_pct >= 67:
            valuation = "High"
        else:
            valuation = "Mid"

        out[key] = {
            "marketcap": mc_millions,
            "pb_current": pb,
            "pb_current_pctile": pb_pct,
            "div_y": div_val,
            "valuation": valuation,   # üëà new field
        }
        if sector_map:
            out[key]["sector"] = sector_map.get(key)

    return out


# -------------------- Main --------------------

def main():
    base_dir = os.getenv("DATA_BASE_DIR", DEFAULT_BASE_DIR)
    guard_path(base_dir)
    safe_mkdirs(base_dir)

    run_dir = make_daily_run_dir(base_dir)
    current_dir = ensure_current_dir(base_dir)

    print(f"üìÅ Daily archive folder: {run_dir}")
    print(f"üìÇ Current folder for Qualtrics: {current_dir}")

    run_summary = {
        "started_at": datetime.now().isoformat(timespec="seconds"),
        "window_start": START.strftime("%Y-%m-%d"),
        "window_end": END.strftime("%Y-%m-%d"),
        "files": [],
        "errors": [],
    }

    # ---- Price history (yfinance) ----
    for sym in TICKERS:
        print(f"‚è≥ Fetching 365-day data for {sym}‚Ä¶")
        last_err = None
        for attempt in range(1, MAX_RETRIES + 1):
            try:
                tkr = yf.Ticker(sym)
                df = tkr.history(
                    start=START.strftime("%Y-%m-%d"),
                    end=END.strftime("%Y-%m-%d"),
                    auto_adjust=True,
                )
                if df is None or df.empty:
                    raise ValueError("Empty dataframe returned.")

                pts = [
                    [int(row_ts.timestamp() * 1000), round(float(row["Close"]), 2)]
                    for row_ts, row in df.iterrows()
                    if row.get("Close") is not None
                ]
                if not pts:
                    raise ValueError("No valid close prices found.")

                out_sym = sym.replace("-USD", "").replace(".", "").lower()

                out_path = os.path.join(run_dir, f"{out_sym}_365d.json")
                write_json(out_path, {"prices": pts})

                copy_to_current(out_path, current_dir)

                print(f" ‚úÖ Wrote {out_sym}_365d.json ({len(pts)} points)")

                run_summary["files"].append({
                    "symbol": sym,
                    "file": out_path,
                    "points": len(pts),
                    "first_ts": pts[0][0],
                    "last_ts": pts[-1][0],
                })
                last_err = None
                break
            except Exception as e:
                last_err = str(e)
                print(f"   ‚ö†Ô∏è Attempt {attempt}/{MAX_RETRIES} failed: {last_err}")
                if attempt < MAX_RETRIES:
                    time.sleep(RETRY_DELAY_SEC)

        if last_err:
            run_summary["errors"].append({"symbol": sym, "error": last_err})

        time.sleep(SLEEP_BETWEEN_TICKERS_SEC)

    # ---- Metrics from CSV (no Yahoo fundamentals) ----
    try:
        funds = fetch_metrics_from_csv(
            FUND_TICKERS,
            CSV_METRICS_PATH,
            sector_map=SECTOR_MAP
        )
    except Exception as e:
        funds = {sym: {"error": f"CSV read failed: {e}", "sector": SECTOR_MAP.get(sym)} for sym in FUND_TICKERS}

    funds_run_path = os.path.join(run_dir, "fundamentals.json")
    funds_cur_path = os.path.join(current_dir, "fundamentals.json")
    write_json(funds_run_path, funds)
    write_json(funds_cur_path, funds)

    run_summary["fundamentals"] = {
        "tickers": FUND_TICKERS,
        "file_run": funds_run_path,
        "file_current": funds_cur_path,
        "source": "csv",
        "csv_path": CSV_METRICS_PATH,
        "fields": ["marketcap", "pb_current", "pb_current_pctile", "div_y"]
    }

    # ---- Save summary ----
    write_json(os.path.join(run_dir, "summary.json"), run_summary)
    write_json(os.path.join(current_dir, "summary.json"), run_summary)

    # ---- Auto-commit & push ----
    git_commit_and_push(
        repo_root=base_dir,
        run_dir=run_dir,
        current_dir=current_dir,
        branch="main"
    )

    print("üèÅ Done.")
    if run_summary["errors"]:
        print("Some symbols failed (see summary.json).")
    else:
        print("All symbols fetched successfully.")
    print(f"Latest 'current' folder ready for Qualtrics: {current_dir}")

if __name__ == "__main__":
    main()


üìÅ Daily archive folder: /Users/paulgrass/Library/Mobile Documents/com~apple~CloudDocs/Documents/Programming/Git/pilot3-asset-data/finalpilot/crypto/runs/run_2025-12-17
üìÇ Current folder for Qualtrics: /Users/paulgrass/Library/Mobile Documents/com~apple~CloudDocs/Documents/Programming/Git/pilot3-asset-data/finalpilot/crypto/current
‚è≥ Fetching 365-day data for BTC-USD‚Ä¶
 ‚úÖ Wrote btc_365d.json (365 points)
‚è≥ Fetching 365-day data for XMR-USD‚Ä¶
 ‚úÖ Wrote xmr_365d.json (365 points)
‚è≥ Fetching 365-day data for BNB-USD‚Ä¶
 ‚úÖ Wrote bnb_365d.json (365 points)
‚è≥ Fetching 365-day data for BCH-USD‚Ä¶
 ‚úÖ Wrote bch_365d.json (365 points)
‚è≥ Fetching 365-day data for ETH-USD‚Ä¶
 ‚úÖ Wrote eth_365d.json (365 points)
‚è≥ Fetching 365-day data for TRX-USD‚Ä¶
 ‚úÖ Wrote trx_365d.json (365 points)
‚úÖ Pushed to origin; GitHub Action will purge jsDelivr cache.
üèÅ Done.
All symbols fetched successfully.
Latest 'current' folder ready for Qualtrics: /Users/paulgrass/Library/Mobile Doc

To github.com:pagrass/pilot1-asset-data.git
   41c355a..5045671  main -> main


In [None]:
#!/usr/bin/env python3

import subprocess
import json
import os
import time
from datetime import datetime, timedelta

# Deps: pip install yfinance curl_cffi
from curl_cffi import requests
import yfinance as yf

# -------------------- Config --------------------

# Stocks & cryptos
STOCKS  = ["CSCO", "TMUS", "TWLO", "PEGA", "ROG", "PD"]

#CRYPTOS = ["BTC-USD", "ETH-USD", "XMR-USD", "APT21794-USD", "QNT-USD", "TON11419-USD", "DOT-USD"]
TICKERS = STOCKS 
#+ CRYPTOS

FUND_TICKERS = ["CSCO", "TMUS", "TWLO", "PEGA", "ROG", "PD"]

SECTOR_MAP = {
    "CSCO": "Information Technology",
    "TMUS": "Communication Services",
    "TWLO": "Information Technology",
    "PEGA": "Information Technology",
    "ROG": "Information Technology",
    "PD": "Information Technologys",
}

# Date window
END   = datetime.now()
START = END - timedelta(days=365)

# Default base dir (each run gets a new subfolder here)
DEFAULT_BASE_DIR = "/Users/paulgrass/Library/Mobile Documents/com~apple~CloudDocs/Documents/Programming/Git/pilot4-asset-data"


# Rate limiting
SLEEP_BETWEEN_TICKERS_SEC = 10
MAX_RETRIES = 3
RETRY_DELAY_SEC = 5

# Safety: never allow writing into these substrings
FORBIDDEN_SUBSTRINGS = ["pilot3-asset-data"]

# -------------------- Helpers --------------------

def git_commit_and_push(repo_root: str, run_dir: str, current_dir: str, branch: str = "main"):
    # Only commit if there are changes in current/ or today's run folder
    rel_run = os.path.relpath(run_dir, repo_root)
    rel_cur = os.path.relpath(current_dir, repo_root)

    # Make sure we're in the repo root so git paths work
    cwd_before = os.getcwd()
    os.chdir(repo_root)
    try:
        # Check if there are changes to these paths
        diff = subprocess.run(
            ["git", "status", "--porcelain", rel_cur, rel_run],
            capture_output=True, text=True
        )
        if diff.returncode != 0:
            print("‚ö†Ô∏è  git status failed; not pushing.")
            return
        if diff.stdout.strip() == "":
            print("‚ÑπÔ∏è  No changes to commit; skipping push.")
            return

        # Ensure basic identity is set (won't override if already set)
        subprocess.run(["git", "config", "--get", "user.email"], check=False)
        subprocess.run(["git", "config", "--get", "user.name"], check=False)

        # Stage just what we care about
        subprocess.run(["git", "add", rel_cur, rel_run], check=True)

        # Commit
        msg = f"Update data: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
        commit = subprocess.run(["git", "commit", "-m", msg], capture_output=True, text=True)
        if commit.returncode != 0:
            # Likely "nothing to commit" race; bail quietly
            print(commit.stdout or commit.stderr or "‚ÑπÔ∏è  Nothing to commit.")
            return

        # Push
        subprocess.run(["git", "push", "origin", branch], check=True)
        print("‚úÖ Pushed to origin; GitHub Action will purge jsDelivr cache.")
    finally:
        os.chdir(cwd_before)

def fetch_fundamentals(symbols, yf_session, max_retries=3, retry_delay=5):
    """
    Returns a dict {SYM: {eps, pe, div_y, sector}}.
    Uses yfinance .info (can be slow/flaky; retried for robustness).
    """
    out = {}
    for sym in symbols:
        print(f"‚è≥ Fetching fundamentals for {sym}‚Ä¶")
        last_err = None
        for attempt in range(1, max_retries + 1):
            try:
                t = yf.Ticker(sym, session=yf_session)
                info = t.info  # may be slow / rate-limited
                eps = info.get("trailingEps")
                price = info.get("regularMarketPrice")
                pe = round(price / eps, 1) if eps and price else None
                dy = info.get("dividendYield")
                if dy is not None:
                    dy = round(dy * 100, 1)
                out[sym] = {
                    "eps": eps,
                    "pe": pe,
                    "div_y": dy,
                    "sector": SECTOR_MAP.get(sym),
                }
                last_err = None
                break
            except Exception as e:
                last_err = str(e)
                print(f"   ‚ö†Ô∏è  Attempt {attempt}/{max_retries} failed: {last_err}")
                if attempt < max_retries:
                    time.sleep(retry_delay)
        if last_err:
            # record partial error so you see it in summary.json
            out[sym] = {"error": last_err, "sector": SECTOR_MAP.get(sym)}
        time.sleep(2)  # small spacing to be polite
    return out


def guard_path(path: str):
    norm = os.path.normpath(path)
    for bad in FORBIDDEN_SUBSTRINGS:
        if bad in norm:
            raise RuntimeError(f"Refusing to write into forbidden path: {norm}")

def safe_mkdirs(path: str):
    guard_path(path)
    os.makedirs(path, exist_ok=True)

def make_daily_run_dir(base_dir: str) -> str:
    date_str = datetime.now().strftime("%Y-%m-%d")
    run_dir = os.path.join(base_dir, "runs", f"run_{date_str}")
    safe_mkdirs(run_dir)
    return run_dir


def ensure_current_dir(base_dir: str) -> str:
    # Put current/ directly under base_dir (not inside runs/)
    cur = os.path.join(base_dir, "current_day2")
    safe_mkdirs(cur)
    return cur

def copy_to_current(src_path: str, current_dir: str):
    guard_path(current_dir)
    dst_path = os.path.join(current_dir, os.path.basename(src_path))
    with open(src_path, "rb") as s, open(dst_path, "wb") as d:
        d.write(s.read())
    return dst_path



def write_json(path: str, obj):
    guard_path(path)
    with open(path, "w") as f:
        json.dump(obj, f, indent=2)

def create_or_update_symlink(target_dir: str, link_path: str):
    # Create/refresh a 'latest' symlink for convenience (best-effort)
    try:
        if os.path.islink(link_path) or os.path.exists(link_path):
            os.remove(link_path)
        os.symlink(target_dir, link_path)
    except Exception:
        # Non-fatal if symlink fails (e.g., on certain filesystems)
        pass

# -------------------- Main --------------------

def main():
    session = requests.Session(impersonate="chrome124")
    yf_session = session  # yfinance accepts a curl_cffi session via 'session'

    base_dir = os.getenv("DATA_BASE_DIR", DEFAULT_BASE_DIR)
    guard_path(base_dir)
    safe_mkdirs(base_dir)

    run_dir = make_daily_run_dir(base_dir)
    current_dir = ensure_current_dir(base_dir)

    print(f"üìÅ Daily archive folder: {run_dir}")
    print(f"üìÇ Current folder for Qualtrics: {current_dir}")

    run_summary = {
        "started_at": datetime.now().isoformat(timespec="seconds"),
        "window_start": START.strftime("%Y-%m-%d"),
        "window_end": END.strftime("%Y-%m-%d"),
        "files": [],
        "errors": [],
    }

    for sym in TICKERS:
        print(f"‚è≥ Fetching 365-day data for {sym}‚Ä¶")
        last_err = None
        for attempt in range(1, MAX_RETRIES + 1):
            try:
                tkr = yf.Ticker(sym, session=yf_session)
                df = tkr.history(
                    start=START.strftime("%Y-%m-%d"),
                    end=END.strftime("%Y-%m-%d"),
                    auto_adjust=True,
                )
                if df is None or df.empty:
                    raise ValueError("Empty dataframe returned.")

                pts = [
                    [int(row_ts.timestamp() * 1000), round(float(row["Close"]), 2)]
                    for row_ts, row in df.iterrows()
                    if row.get("Close") is not None
                ]
                if not pts:
                    raise ValueError("No valid close prices found.")

                out_sym = sym.replace("-USD", "").replace(".", "").lower()

                # Write to daily run folder
                out_path = os.path.join(run_dir, f"{out_sym}_365d.json")
                write_json(out_path, {"prices": pts})

                # Also copy to current folder for Qualtrics
                copy_to_current(out_path, current_dir)

                print(f" ‚úÖ Wrote {out_sym}_365d.json ({len(pts)} points)")

                run_summary["files"].append({
                    "symbol": sym,
                    "file": out_path,
                    "points": len(pts),
                    "first_ts": pts[0][0],
                    "last_ts": pts[-1][0],
                })
                last_err = None
                break
            except Exception as e:
                last_err = str(e)
                print(f"   ‚ö†Ô∏è Attempt {attempt}/{MAX_RETRIES} failed: {last_err}")
                if attempt < MAX_RETRIES:
                    time.sleep(RETRY_DELAY_SEC)

        if last_err:
            run_summary["errors"].append({"symbol": sym, "error": last_err})

        time.sleep(SLEEP_BETWEEN_TICKERS_SEC)
    # --- fundamentals ---
    funds = fetch_fundamentals(FUND_TICKERS, yf_session, max_retries=MAX_RETRIES, retry_delay=RETRY_DELAY_SEC)

    # save fundamentals.json in both places
    funds_run_path = os.path.join(run_dir, "fundamentals.json")
    funds_cur_path = os.path.join(current_dir, "fundamentals.json")
    write_json(funds_run_path, funds)
    write_json(funds_cur_path, funds)

    # add to summary
    run_summary["fundamentals"] = {
        "tickers": FUND_TICKERS,
        "file_run": funds_run_path,
        "file_current": funds_cur_path,
    }

    # Save summary to both locations
    write_json(os.path.join(run_dir, "summary.json"), run_summary)
    write_json(os.path.join(current_dir, "summary.json"), run_summary)

    # Auto-commit & push (triggers purge workflow)
    git_commit_and_push(
        repo_root=base_dir,
        run_dir=run_dir,
        current_dir=current_dir,
        branch="main"
    )


    print("üèÅ Done.")
    if run_summary["errors"]:
        print("Some symbols failed (see summary.json).")
    else:
        print("All symbols fetched successfully.")
    print(f"Latest 'current' folder ready for Qualtrics: {current_dir}")

if __name__ == "__main__":
    main()

üìÅ Daily archive folder: /Users/paulgrass/Library/Mobile Documents/com~apple~CloudDocs/Documents/Programming/Git/pilot3-asset-data/runs/run_2025-08-19
üìÇ Current folder for Qualtrics: /Users/paulgrass/Library/Mobile Documents/com~apple~CloudDocs/Documents/Programming/Git/pilot3-asset-data/current_day2
‚è≥ Fetching 365-day data for BTC-USD‚Ä¶
 ‚úÖ Wrote btc_365d.json (365 points)
‚è≥ Fetching 365-day data for ETH-USD‚Ä¶
 ‚úÖ Wrote eth_365d.json (365 points)
‚è≥ Fetching 365-day data for XMR-USD‚Ä¶
 ‚úÖ Wrote xmr_365d.json (365 points)
‚è≥ Fetching 365-day data for APT21794-USD‚Ä¶
 ‚úÖ Wrote apt21794_365d.json (365 points)
‚è≥ Fetching 365-day data for QNT-USD‚Ä¶
 ‚úÖ Wrote qnt_365d.json (365 points)
‚è≥ Fetching 365-day data for TON11419-USD‚Ä¶
 ‚úÖ Wrote ton11419_365d.json (365 points)
‚è≥ Fetching 365-day data for DOT-USD‚Ä¶
 ‚úÖ Wrote dot_365d.json (365 points)
‚è≥ Fetching fundamentals for DOCN‚Ä¶
‚è≥ Fetching fundamentals for MSFT‚Ä¶
‚è≥ Fetching fundamentals for VZ‚Ä¶
‚è≥ Fetc

To github.com:pagrass/pilot1-asset-data.git
   79058b4..ba00fc3  main -> main


In [None]:
#!/usr/bin/env python3

import subprocess
import json
import os
import time
from datetime import datetime, timedelta

# Deps: pip install yfinance curl_cffi
from curl_cffi import requests
import yfinance as yf

# -------------------- Config --------------------

# Stocks & cryptos
STOCKS  = ["CSCO", "ARLO", "CMPO", "ZS", "KO", "MCD"]
#STOCKS  = []

#CRYPTOS = ["BTC-USD", "ETH-USD", "XMR-USD", "APT21794-USD", "QNT-USD", "TON11419-USD", "DOT-USD"]
CRYPTOS = []
TICKERS = STOCKS + CRYPTOS

FUND_TICKERS = ["DOCN", "ARLO", "CMPO", "ZS", "KO", "MCD"]

SECTOR_MAP = {
    "DOCN": "Technology",
    "MSFT": "Technology",
    "VZ":  "Communication Services",
    "ZS":  "Technology",
    "UFPI": "Basic Materials",
    "DY":   "Industrials",
}

# Date window
END   = datetime.now()
START = END - timedelta(days=365)

# Default base dir (each run gets a new subfolder here)
DEFAULT_BASE_DIR = "/Users/paulgrass/Library/Mobile Documents/com~apple~CloudDocs/Documents/Programming/Git/pilot3-asset-data"


# Rate limiting
SLEEP_BETWEEN_TICKERS_SEC = 10
MAX_RETRIES = 3
RETRY_DELAY_SEC = 5

# Safety: never allow writing into these substrings
FORBIDDEN_SUBSTRINGS = ["pilot2-asset-data"]

# -------------------- Helpers --------------------

def git_commit_and_push(repo_root: str, run_dir: str, current_dir: str, branch: str = "main"):
    # Only commit if there are changes in current/ or today's run folder
    rel_run = os.path.relpath(run_dir, repo_root)
    rel_cur = os.path.relpath(current_dir, repo_root)

    # Make sure we're in the repo root so git paths work
    cwd_before = os.getcwd()
    os.chdir(repo_root)
    try:
        # Check if there are changes to these paths
        diff = subprocess.run(
            ["git", "status", "--porcelain", rel_cur, rel_run],
            capture_output=True, text=True
        )
        if diff.returncode != 0:
            print("‚ö†Ô∏è  git status failed; not pushing.")
            return
        if diff.stdout.strip() == "":
            print("‚ÑπÔ∏è  No changes to commit; skipping push.")
            return

        # Ensure basic identity is set (won't override if already set)
        subprocess.run(["git", "config", "--get", "user.email"], check=False)
        subprocess.run(["git", "config", "--get", "user.name"], check=False)

        # Stage just what we care about
        subprocess.run(["git", "add", rel_cur, rel_run], check=True)

        # Commit
        msg = f"Update data: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
        commit = subprocess.run(["git", "commit", "-m", msg], capture_output=True, text=True)
        if commit.returncode != 0:
            # Likely "nothing to commit" race; bail quietly
            print(commit.stdout or commit.stderr or "‚ÑπÔ∏è  Nothing to commit.")
            return

        # Push
        subprocess.run(["git", "push", "origin", branch], check=True)
        print("‚úÖ Pushed to origin; GitHub Action will purge jsDelivr cache.")
    finally:
        os.chdir(cwd_before)

def fetch_fundamentals(symbols, yf_session, max_retries=3, retry_delay=5):
    """
    Returns a dict {SYM: {eps, pe, div_y, sector}}.
    Uses yfinance .info (can be slow/flaky; retried for robustness).
    """
    out = {}
    for sym in symbols:
        print(f"‚è≥ Fetching fundamentals for {sym}‚Ä¶")
        last_err = None
        for attempt in range(1, max_retries + 1):
            try:
                t = yf.Ticker(sym, session=yf_session)
                info = t.info  # may be slow / rate-limited
                eps = info.get("trailingEps")
                price = info.get("regularMarketPrice")
                pe = round(price / eps, 1) if eps and price else None
                dy = info.get("dividendYield")
                if dy is not None:
                    dy = round(dy * 100, 1)
                out[sym] = {
                    "eps": eps,
                    "pe": pe,
                    "div_y": dy,
                    "sector": SECTOR_MAP.get(sym),
                }
                last_err = None
                break
            except Exception as e:
                last_err = str(e)
                print(f"   ‚ö†Ô∏è  Attempt {attempt}/{max_retries} failed: {last_err}")
                if attempt < max_retries:
                    time.sleep(retry_delay)
        if last_err:
            # record partial error so you see it in summary.json
            out[sym] = {"error": last_err, "sector": SECTOR_MAP.get(sym)}
        time.sleep(2)  # small spacing to be polite
    return out


def guard_path(path: str):
    norm = os.path.normpath(path)
    for bad in FORBIDDEN_SUBSTRINGS:
        if bad in norm:
            raise RuntimeError(f"Refusing to write into forbidden path: {norm}")

def safe_mkdirs(path: str):
    guard_path(path)
    os.makedirs(path, exist_ok=True)

def make_daily_run_dir(base_dir: str) -> str:
    date_str = datetime.now().strftime("%Y-%m-%d")
    run_dir = os.path.join(base_dir, "runs", f"run_{date_str}")
    safe_mkdirs(run_dir)
    return run_dir


def ensure_current_dir(base_dir: str) -> str:
    # Put current/ directly under base_dir (not inside runs/)
    cur = os.path.join(base_dir, "current_day2")
    safe_mkdirs(cur)
    return cur

def copy_to_current(src_path: str, current_dir: str):
    guard_path(current_dir)
    dst_path = os.path.join(current_dir, os.path.basename(src_path))
    with open(src_path, "rb") as s, open(dst_path, "wb") as d:
        d.write(s.read())
    return dst_path



def write_json(path: str, obj):
    guard_path(path)
    with open(path, "w") as f:
        json.dump(obj, f, indent=2)

def create_or_update_symlink(target_dir: str, link_path: str):
    # Create/refresh a 'latest' symlink for convenience (best-effort)
    try:
        if os.path.islink(link_path) or os.path.exists(link_path):
            os.remove(link_path)
        os.symlink(target_dir, link_path)
    except Exception:
        # Non-fatal if symlink fails (e.g., on certain filesystems)
        pass

# -------------------- Main --------------------

def main():
    session = requests.Session(impersonate="chrome124")
    yf_session = session  # yfinance accepts a curl_cffi session via 'session'

    base_dir = os.getenv("DATA_BASE_DIR", DEFAULT_BASE_DIR)
    guard_path(base_dir)
    safe_mkdirs(base_dir)

    run_dir = make_daily_run_dir(base_dir)
    current_dir = ensure_current_dir(base_dir)

    print(f"üìÅ Daily archive folder: {run_dir}")
    print(f"üìÇ Current folder for Qualtrics: {current_dir}")

    run_summary = {
        "started_at": datetime.now().isoformat(timespec="seconds"),
        "window_start": START.strftime("%Y-%m-%d"),
        "window_end": END.strftime("%Y-%m-%d"),
        "files": [],
        "errors": [],
    }

    for sym in TICKERS:
        print(f"‚è≥ Fetching 365-day data for {sym}‚Ä¶")
        last_err = None
        for attempt in range(1, MAX_RETRIES + 1):
            try:
                tkr = yf.Ticker(sym, session=yf_session)
                df = tkr.history(
                    start=START.strftime("%Y-%m-%d"),
                    end=END.strftime("%Y-%m-%d"),
                    auto_adjust=True,
                )
                if df is None or df.empty:
                    raise ValueError("Empty dataframe returned.")

                pts = [
                    [int(row_ts.timestamp() * 1000), round(float(row["Close"]), 2)]
                    for row_ts, row in df.iterrows()
                    if row.get("Close") is not None
                ]
                if not pts:
                    raise ValueError("No valid close prices found.")

                out_sym = sym.replace("-USD", "").replace(".", "").lower()

                # Write to daily run folder
                out_path = os.path.join(run_dir, f"{out_sym}_365d.json")
                write_json(out_path, {"prices": pts})

                # Also copy to current folder for Qualtrics
                copy_to_current(out_path, current_dir)

                print(f" ‚úÖ Wrote {out_sym}_365d.json ({len(pts)} points)")

                run_summary["files"].append({
                    "symbol": sym,
                    "file": out_path,
                    "points": len(pts),
                    "first_ts": pts[0][0],
                    "last_ts": pts[-1][0],
                })
                last_err = None
                break
            except Exception as e:
                last_err = str(e)
                print(f"   ‚ö†Ô∏è Attempt {attempt}/{MAX_RETRIES} failed: {last_err}")
                if attempt < MAX_RETRIES:
                    time.sleep(RETRY_DELAY_SEC)

        if last_err:
            run_summary["errors"].append({"symbol": sym, "error": last_err})

        time.sleep(SLEEP_BETWEEN_TICKERS_SEC)
    # --- fundamentals ---
    funds = fetch_fundamentals(FUND_TICKERS, yf_session, max_retries=MAX_RETRIES, retry_delay=RETRY_DELAY_SEC)

    # save fundamentals.json in both places
    funds_run_path = os.path.join(run_dir, "fundamentals.json")
    funds_cur_path = os.path.join(current_dir, "fundamentals.json")
    write_json(funds_run_path, funds)
    write_json(funds_cur_path, funds)

    # add to summary
    run_summary["fundamentals"] = {
        "tickers": FUND_TICKERS,
        "file_run": funds_run_path,
        "file_current": funds_cur_path,
    }

    # Save summary to both locations
    write_json(os.path.join(run_dir, "summary.json"), run_summary)
    write_json(os.path.join(current_dir, "summary.json"), run_summary)

    # Auto-commit & push (triggers purge workflow)
    git_commit_and_push(
        repo_root=base_dir,
        run_dir=run_dir,
        current_dir=current_dir,
        branch="main"
    )


    print("üèÅ Done.")
    if run_summary["errors"]:
        print("Some symbols failed (see summary.json).")
    else:
        print("All symbols fetched successfully.")
    print(f"Latest 'current' folder ready for Qualtrics: {current_dir}")

if __name__ == "__main__":
    main()

üìÅ Daily archive folder: /Users/paulgrass/Library/Mobile Documents/com~apple~CloudDocs/Documents/Programming/Git/pilot3-asset-data/runs/run_2025-10-20
üìÇ Current folder for Qualtrics: /Users/paulgrass/Library/Mobile Documents/com~apple~CloudDocs/Documents/Programming/Git/pilot3-asset-data/current_day2
‚è≥ Fetching 365-day data for CSCO‚Ä¶
 ‚úÖ Wrote csco_365d.json (249 points)
‚è≥ Fetching 365-day data for MSFT‚Ä¶
 ‚úÖ Wrote msft_365d.json (249 points)


KeyboardInterrupt: 