In [1]:
import pandas as pd
import numpy as np
import yfinance as yf
import matplotlib.pyplot as plt

In [150]:
TICKERS = ["NVDA", "TSLA"]
START = "2016-01-01"
END = "2024-12-31"
FREQ = "1d"
SWEEP_MAX = 30
SWEEP_MIN = 5
TRAIN_LEN = "730d"
TEST_LEN = "90d"
STEP = "90d"
TARGET = "Profit Ratio" # Sharpe or Profit Ratio (case sensitive)

def assert_prices_schema(df: pd.DataFrame):
    assert isinstance(df.index, pd.DatetimeIndex), "Index must be DateTime"
    assert df.index.is_monotonic_increasing, "Index must be increasing"
    assert not df[[c for c in ["Open", "Close", "High", "Low", "Volume"]]].le(
        0).any().any(), "Non-positive Prices Found"


def is_tidy(df: pd.DataFrame):
    assert isinstance(
        df.index, pd.DatetimeIndex) and "Ticker" in df.columns, "Dataframe is not tidy"


def download_ledger(tickers = TICKERS, start=START, end = END, freq="1d"):
    temp = {}
    for t in tickers:
        try:
            temp[t] = yf.download(
                t, start=start, end = end, interval=freq, multi_level_index=False)
        except Exception as e:
            print(f"{e} failed to download")
    out = pd.concat(temp, axis=1)
    out = out.stack(level=0, future_stack=True).reset_index().rename(
        columns={"level_1": "Ticker"})
    out.set_index("Date", inplace=True)
    return out

def add_returns(df: pd.DataFrame):
    out = df.copy()
    return out.assign(
        Ret=lambda d: d.groupby("Ticker")["Close"].pct_change().fillna(0.0),
    )
    

def assert_no_cheating(df: pd.DataFrame):
    pass


def breakout(df: pd.DataFrame, lookback: int = None):
    out = df.copy()
    return out.assign(
        Upper=lambda d: d["Close"].rolling(window=lookback, min_periods=lookback).max().shift(1),
        Lower=lambda d: d["Close"].rolling(window=lookback, min_periods=lookback).min().shift(1),
        Signal=lambda d: (d["Close"].gt(d["Upper"]).astype(
            int) - d["Close"].lt(d["Lower"]).astype(int)),
        Position=lambda d: d["Signal"].shift(1).fillna((0.0)),
        Strat_Ret =lambda d: (d["Ret"] * d["Position"]),
        Equity = lambda d: (1 + d["Strat_Ret"]).cumprod()
    )

def ann_sharpe(sr):
    sr = sr["Strat_Ret"] if isinstance(sr, pd.DataFrame) else pd.Series(sr)
    sr = sr.dropna()
    out = (sr.mean()/sr.std(ddof=1) * np.sqrt(252))
    return out

def ann_pf(sr):
    sr = sr["Strat_Ret"] if isinstance(sr, pd.DataFrame) else pd.Series(sr)
    sr = sr.dropna()
    pos = sr[sr>0].sum()
    neg = sr[sr<0].sum()
    out = pos/(np.abs(neg) +1e-12)
    return out

def optimize_breakout(df: pd.DataFrame, sweep_max=SWEEP_MAX, sweep_min=SWEEP_MIN):
    best_lookback = 0
    best_sharpe = -np.inf
    best_pf = -np.inf
    if TARGET == "Sharpe":
        for i in range(sweep_min, sweep_max+1):
            out = breakout(df, lookback=i)
            sharpe = ann_sharpe(out)
            if sharpe > best_sharpe:
                best_sharpe = sharpe
                best_lookback = i
        return pd.Series({"Best Lookback" : best_lookback, "Best Sharpe" : best_sharpe})
        
    if TARGET == "Profit Ratio":
        for i in range(sweep_min, sweep_max+1):
            out = breakout(df, lookback=i)
            pf = ann_pf(out)
            if pf > best_pf:
                best_pf = pf
                best_lookback = i
        return pd.Series({"Best Lookback" : best_lookback, "Best Profit Ratio" : best_pf})
        


def test_strategy(group : pd.DataFrame, bestL_map):
    t = group["Ticker"].iat[0]
    L = int(bestL_map[t])
    result = breakout(group, L)
    if TARGET == "Sharpe":
        Sharpe = ann_sharpe(result)
        return pd.Series({"Lookback Used" : L, "Sharpe" : Sharpe})
    if TARGET == "Profit Ratio":
        pf = ann_pf(result)
        return pd.Series({"Lookback Used" : L, "Profit Ratio" : pf})

def make_splits(dates, train_len = TRAIN_LEN, test_len = TEST_LEN, step = STEP):
    dates = pd.DatetimeIndex(dates).sort_values().unique()
    if dates.size == 0:
        return []
    
    train_len = pd.to_timedelta(train_len)
    test_len = pd.to_timedelta(test_len)
    step = pd.to_timedelta(step)

    first_anchor = dates.min() + train_len
    last_anchor = dates.max() - test_len
    if first_anchor > last_anchor:
        return []
    
    splits = []
    anchor = first_anchor

    while anchor <= last_anchor:
        train_start = anchor - train_len
        train_end = anchor
        test_start = anchor
        test_end = anchor + test_len

        splits.append({
            "train_start" : train_start,
            "train_end" : train_end, 
            "test_start" : test_start,
            "test_end" : test_end
        })

        anchor = anchor + step

    return splits

def walk_forward(df: pd.DataFrame):
    global_index = df.index
    splits = make_splits(global_index)
    oos = []
    for w_id, w in enumerate(splits):
        df_train = df.loc[w["train_start"] : w["train_end"]]
        df_test =df.loc[w["test_start"] : w["test_end"]]
        train_results = df_train.groupby("Ticker").apply(optimize_breakout).reset_index()
        bestL_map = dict(zip(train_results["Ticker"], train_results["Best Lookback"]))
        test_results = df_test.groupby("Ticker").apply(test_strategy, bestL_map).reset_index()
        merged = (train_results.merge(test_results[["Ticker", "Lookback Used", TARGET]], on = "Ticker", how = "inner")).assign(
            window_id = w_id,
            train_start = w["train_start"],
        )
        merged["Diff"] = (merged[f"Best {TARGET}"] - merged[TARGET]).abs()
        oos.append(merged)

    summary = pd.concat(oos)
    return summary

df = download_ledger(TICKERS, START, END, FREQ).pipe(add_returns)
is_tidy(df)

results = walk_forward(df)

results

  temp[t] = yf.download(
[*********************100%***********************]  1 of 1 completed
  temp[t] = yf.download(
[*********************100%***********************]  1 of 1 completed
  train_results = df_train.groupby("Ticker").apply(optimize_breakout).reset_index()
  test_results = df_test.groupby("Ticker").apply(test_strategy, bestL_map).reset_index()
  train_results = df_train.groupby("Ticker").apply(optimize_breakout).reset_index()
  test_results = df_test.groupby("Ticker").apply(test_strategy, bestL_map).reset_index()
  train_results = df_train.groupby("Ticker").apply(optimize_breakout).reset_index()
  test_results = df_test.groupby("Ticker").apply(test_strategy, bestL_map).reset_index()
  train_results = df_train.groupby("Ticker").apply(optimize_breakout).reset_index()
  test_results = df_test.groupby("Ticker").apply(test_strategy, bestL_map).reset_index()
  train_results = df_train.groupby("Ticker").apply(optimize_breakout).reset_index()
  test_results = df_test.groupby("Ti

Unnamed: 0,Ticker,Best Lookback,Best Profit Ratio,Lookback Used,Profit Ratio,window_id,train_start,Diff
0,NVDA,26.0,1.443796,26.0,0.130068,0,2016-01-04,1.313729
1,TSLA,20.0,1.608803,20.0,0.968428,0,2016-01-04,0.640375
0,NVDA,26.0,1.279657,26.0,0.259114,1,2016-04-03,1.020543
1,TSLA,19.0,1.280757,19.0,1.034373,1,2016-04-03,0.246384
0,NVDA,26.0,1.18179,26.0,1.384826,2,2016-07-02,0.203036
1,TSLA,19.0,1.340111,19.0,0.552324,2,2016-07-02,0.787787
0,NVDA,26.0,1.125869,26.0,2.026469,3,2016-09-30,0.9006
1,TSLA,19.0,1.207758,19.0,0.252901,3,2016-09-30,0.954858
0,NVDA,5.0,1.190225,5.0,0.578455,4,2016-12-29,0.61177
1,TSLA,16.0,1.009253,16.0,1.190085,4,2016-12-29,0.180832
