In [None]:
import pandas as pd
import numpy as np
import yfinance as yf
from neuralforecast import NeuralForecast
from neuralforecast.models import PatchTST
import logging
import os
os.environ["HTTP_PROXY"]  = "http://proxy.isoad.isogmbh.de:81"
os.environ["HTTPS_PROXY"] = "http://proxy.isoad.isogmbh.de:81"

tickers = ['AAPL', 'SPY', 'MSFT', 'NDX']
# NDX is an index: volume=0, and High/Low/Open == Close (no intraday range)
tickers_with_vol  = ['AAPL', 'SPY', 'MSFT']
tickers_with_ohlc = ['AAPL', 'SPY', 'MSFT']

period = {'start': '2021-06-01', 'end': '2025-01-01'}
df_raw_multi = yf.download(tickers, start=period['start'], end=period['end'])

df_close_log = np.log(df_raw_multi['Close'] / df_raw_multi['Close'].shift(1))
df_open_log  = np.log(df_raw_multi['Open']  / df_raw_multi['Open'].shift(1))
df_high_log  = np.log(df_raw_multi['High']  / df_raw_multi['High'].shift(1))
df_low_log   = np.log(df_raw_multi['Low']   / df_raw_multi['Low'].shift(1))
df_vol_log   = np.log(df_raw_multi['Volume']).diff()

# PatchTST does not support hist_exog_list — each feature is a separate unique_id
df_list = []
for ticker in tickers:
    df_list.append(pd.DataFrame({
        'ds':        df_close_log.index,
        'unique_id': f'{ticker}_price',
        'y':         df_close_log[ticker],
    }).dropna())
    if ticker in tickers_with_ohlc:
        for suffix, series in [('open', df_open_log), ('high', df_high_log), ('low', df_low_log)]:
            df_list.append(pd.DataFrame({
                'ds':        series.index,
                'unique_id': f'{ticker}_{suffix}',
                'y':         series[ticker],
            }).dropna())
    if ticker in tickers_with_vol:
        df_list.append(pd.DataFrame({
            'ds':        df_vol_log.index,
            'unique_id': f'{ticker}_vol',
            'y':         df_vol_log[ticker],
        }).dropna())

df_train_multi = pd.concat(df_list).reset_index(drop=True)
print(df_train_multi['unique_id'].value_counts())
print(df_train_multi.head(8))

In [None]:
import optuna
from neuralforecast import NeuralForecast
from neuralforecast.models import PatchTST
from neuralforecast.losses.pytorch import MAE
from pathlib import Path

N_FOLDS = 4
FOLD_STEP = 20  # trading days between fold cutoffs

study_id = "_".join(tickers) + '.' + period['start'] + '.' + period['end']
report_dir = "optuna_report"

rdir = Path(report_dir, study_id)
rdir.mkdir(parents=True, exist_ok=True)

df_train_multi.to_csv(rdir / "traindata.csv", index=False, sep=",", header=True)


def objective(trial):
    model = PatchTST(
        h=7,
        input_size=trial.suggest_int('input_size', 30, 120, step=10),
        patch_len=trial.suggest_categorical('patch_len', [4, 8, 16]),
        stride=trial.suggest_categorical('stride', [2, 4, 8]),
        encoder_layers=trial.suggest_int('encoder_layers', 1, 3),
        n_heads=trial.suggest_categorical('n_heads', [2, 4, 8]),
        hidden_size=trial.suggest_categorical('hidden_size', [32, 64, 128]),
        linear_hidden_size=trial.suggest_categorical('linear_hidden_size', [64, 128, 256]),
        dropout=trial.suggest_float('dropout', 0.0, 0.3),
        learning_rate=trial.suggest_float('learning_rate', 1e-4, 1e-3, log=True),
        max_steps=500,
        val_check_steps=500,
        early_stop_patience_steps=-1,
        accelerator='gpu',
        devices=1,
        enable_progress_bar=False,
    )

    nf = NeuralForecast(models=[model], freq='D')

    all_dates = np.sort(df_train_multi['ds'].unique())
    n_total = len(all_dates)

    fold_hit_rates = []
    for fold in range(N_FOLDS):
        cutoff = all_dates[n_total - FOLD_STEP * (N_FOLDS - fold)]
        df_fold = df_train_multi[df_train_multi['ds'] <= cutoff]

        cv = nf.cross_validation(df=df_fold, n_windows=3, step_size=1, refit=False)

        # Evaluate only on AAPL_price — that's our trading target
        cv_aapl = cv[cv['unique_id'] == 'AAPL_price']
        actual_dir = np.sign(cv_aapl['y'])
        pred_dir   = np.sign(cv_aapl['PatchTST'])
        hit_rate   = (actual_dir == pred_dir).mean()
        fold_hit_rates.append(hit_rate)

        trial.report(hit_rate, step=fold)
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

    return np.mean(fold_hit_rates)


study = optuna.create_study(
    direction='maximize',  # higher hit rate = better
    pruner=optuna.pruners.MedianPruner(
        n_startup_trials=5,
        n_warmup_steps=1,
    )
)
study.optimize(objective, n_trials=50, timeout=3600)

print("Tickers:", tickers)
print("Period:", period)
print("Best params:   ", study.best_params)
print("Best hit rate: ", study.best_value)

df_trials = study.trials_dataframe()
print(df_trials.sort_values('value', ascending=False).head(10))

# Save report
with open(rdir / "optuna_summary.txt", "w") as reportfile:
    print("Tickers:",    tickers,            file=reportfile)
    print("Period:",     period,             file=reportfile)
    print("Best params:", study.best_params, file=reportfile)
    print("Best hit rate:", study.best_value, file=reportfile)

df_trials.sort_values("value", ascending=False).to_csv(rdir / "optuna_trials.csv", index=False)