In [1]:
# Add project root to sys.path
import os, sys
from pathlib import Path

project_root = Path("..").resolve()
if str(project_root) not in sys.path:
    sys.path.append(str(project_root))

print("Project root:", project_root)

Project root: C:\Users\ng4bo\OneDrive\Desktop\Alpaca Project\alpaca-bot-starter\alpaca-bot-starter


In [2]:
# Core imports
import pandas as pd
import numpy as np
from pathlib import Path

import matplotlib.pyplot as plt
from pmdarima import auto_arima

from src.config_symbols import PORTFOLIOS


ModuleNotFoundError: No module named 'pmdarima'

In [None]:
# Choose portfolio and basic settings
# Which portfolio to tune? ("all", "etf", "tech", "defensive")
PORTFOLIO_NAME = "all"
SYMBOLS = PORTFOLIOS[PORTFOLIO_NAME]

DATA_DIR = project_root / "data"
REPORTS_DIR = project_root / "reports"
REPORTS_DIR.mkdir(exist_ok=True)

N_TEST = 100  # number of last observations used for test set

SYMBOLS

In [None]:
# Helper functions: load returns & tune one symbol

def load_returns(symbol: str, timeframe: str = "1Day") -> pd.Series:
    """Load returns for a symbol from *_returns_only.csv."""
    fname = f"{symbol}_{timeframe}_returns_only.csv"
    path = DATA_DIR / fname

    if not path.exists():
        raise FileNotFoundError(f"Missing file for {symbol}: {path}")

    df = pd.read_csv(path, parse_dates=["ts"])
    df = df.sort_values("ts").dropna(subset=["return"]).reset_index(drop=True)
    return df["return"]


def tune_symbol(symbol: str, n_test: int = 100) -> dict:
    """
    Use auto_arima to find best (p,d,q) for a symbol's returns,
    and compute out-of-sample RMSE on the last n_test points.
    """
    series = load_returns(symbol)

    if len(series) <= n_test + 10:
        raise ValueError(f"Not enough data for {symbol} (len={len(series)})")

    train = series[:-n_test]
    test = series[-n_test:]

    # We already know returns tend to be stationary from ADF, so start with d=0.
    model = auto_arima(
        train,
        start_p=0, max_p=3,
        start_q=0, max_q=3,
        d=0,
        seasonal=False,
        stepwise=True,
        error_action="ignore",
        suppress_warnings=True,
        information_criterion="aic",
    )

    preds = model.predict(n_periods=len(test))
    preds = pd.Series(preds, index=test.index)

    rmse = float(np.sqrt(((preds - test) ** 2).mean()))

    return {
        "symbol": symbol,
        "order": model.order,
        "rmse": rmse,
        "train_len": int(len(train)),
        "test_len": int(len(test)),
    }


In [None]:
# Run tuning across the portfolio

results = []

for sym in SYMBOLS:
    print(f"Tuning {sym}...")
    try:
        info = tune_symbol(sym, n_test=N_TEST)
        info["portfolio"] = PORTFOLIO_NAME
        results.append(info)
        print(f"  Done: order={info['order']}, rmse={info['rmse']:.6f}")
    except Exception as e:
        print(f"  Error for {sym}: {e}")

results_df = pd.DataFrame(results)
results_df = results_df.sort_values("rmse").reset_index(drop=True)
results_df


In [None]:
# Save the tuning results per portfolio
out_path = REPORTS_DIR / f"arima_tuning_{PORTFOLIO_NAME}.csv"
results_df.to_csv(out_path, index=False)
print(f"Saved tuning results â†’ {out_path}")
