In [1]:
# Project root & imports
import os, sys
from pathlib import Path

# Make sure project root is on the path
project_root = Path("..").resolve()
if str(project_root) not in sys.path:
    sys.path.append(str(project_root))

print("Project root:", project_root)


Project root: C:\Users\ng4bo\OneDrive\Desktop\Alpaca Project\alpaca-bot-starter\alpaca-bot-starter


In [2]:
# Import config + helpers
from datetime import datetime

import pandas as pd

from src.config_symbols import PORTFOLIOS
from src.fetch_data import fetch_bars, merge_and_save
from src.process_data import save_returns_only
from src.logging_utils import get_logger
from src.audit_utils import record_data_run

In [3]:
# Choose portfolio & parameters
PORTFOLIO_NAME = "all"     # or "etf", "tech", "defensive"
SYMBOLS = PORTFOLIOS[PORTFOLIO_NAME]

START_DATE = "2018-01-01"
TIMEFRAME = "1Day"

DATA_DIR = project_root / "data"
LOGS_DIR = project_root / "logs"
DATA_DIR.mkdir(exist_ok=True)
LOGS_DIR.mkdir(exist_ok=True)

logger = get_logger("portfolio_prep", LOGS_DIR / "fetch_data.log")

# Run ID ties log lines + audit row together
started_at = datetime.utcnow()
run_id = started_at.strftime("prep_%Y%m%dT%H%M%SZ")

print("Run ID:", run_id)
logger.info(f"[{run_id}] Starting portfolio prep for '{PORTFOLIO_NAME}' with {len(SYMBOLS)} symbols.")



Run ID: prep_20251111T022542Z


  started_at = datetime.utcnow()


In [4]:
# Loop: fetch prices & save returns
data_dir = (project_root / "data")
data_dir.mkdir(exist_ok=True)

n_success = 0
n_failed = 0

for sym in SYMBOLS:
    print(f"=== {sym} ===")
    logger.info(f"[{run_id}] Start symbol={sym}")

    try:
        df_new = fetch_bars(symbol=sym, start="2018-01-01", end=None, timeframe="1Day")
        logger.info(f"[{run_id}] Fetched {len(df_new)} new rows for {sym}")
    except Exception as e:
        logger.exception(f"[{run_id}] Error fetching {sym}: {e}")
        print(f"  Error fetching {sym}: {e}")
        n_failed += 1
        continue

    if df_new.empty:
        logger.warning(f"[{run_id}] No data returned for {sym}. Skipping.")
        print(f"  No data returned for {sym}. Skipping.")
        n_failed += 1
        continue

    outfile = DATA_DIR / f"{sym}_1Day.csv"
    merged = merge_and_save(df_new, outfile, logger)
    logger.info(f"[{run_id}] Saved {len(merged)} total rows for {sym} → {outfile}")
    print(f"  Done: total rows in {outfile}: {len(merged)}")
    n_success += 1

finished_at = datetime.utcnow()
logger.info(f"[{run_id}] Portfolio prep finished. success={n_success}, failed={n_failed}")
print(f"Run complete. success={n_success}, failed={n_failed}")


=== AAPL ===
  Done: total rows in C:\Users\ng4bo\OneDrive\Desktop\Alpaca Project\alpaca-bot-starter\alpaca-bot-starter\data\AAPL_1Day.csv: 1976
=== AMZN ===
  Done: total rows in C:\Users\ng4bo\OneDrive\Desktop\Alpaca Project\alpaca-bot-starter\alpaca-bot-starter\data\AMZN_1Day.csv: 1976
=== DIA ===
  Done: total rows in C:\Users\ng4bo\OneDrive\Desktop\Alpaca Project\alpaca-bot-starter\alpaca-bot-starter\data\DIA_1Day.csv: 1976
=== GOOGL ===
  Done: total rows in C:\Users\ng4bo\OneDrive\Desktop\Alpaca Project\alpaca-bot-starter\alpaca-bot-starter\data\GOOGL_1Day.csv: 1976
=== HD ===
  Done: total rows in C:\Users\ng4bo\OneDrive\Desktop\Alpaca Project\alpaca-bot-starter\alpaca-bot-starter\data\HD_1Day.csv: 1976
=== IWM ===
  Done: total rows in C:\Users\ng4bo\OneDrive\Desktop\Alpaca Project\alpaca-bot-starter\alpaca-bot-starter\data\IWM_1Day.csv: 1976
=== JNJ ===
  Done: total rows in C:\Users\ng4bo\OneDrive\Desktop\Alpaca Project\alpaca-bot-starter\alpaca-bot-starter\data\JNJ_1Day.csv

  finished_at = datetime.utcnow()


In [5]:
# Write the audit row

record_data_run(
    run_id=run_id,
    portfolio=PORTFOLIO_NAME,
    symbols=SYMBOLS,
    n_success=n_success,
    n_failed=n_failed,
    started_at=started_at,
    finished_at=finished_at,
)


✅ Audit updated → reports\data_audit_log.csv
