In [None]:
# transformer_forecast.py
# Transformer Encoder for 13-step ahead forecasting on OHLC data.
# Usage:
#   python transformer_forecast.py --train train.csv --test test.csv --output submission.csv
# Optional:
#   --seq_len 90 --d_model 256 --nhead 8 --num_layers 6 --epochs 120
#   --cpu  (force CPU even if CUDA is available)

In [68]:
import argparse
import numpy as np
import pandas as pd
from pathlib import Path

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, random_split

from sklearn.preprocessing import StandardScaler

HORIZON = 13

In [69]:
# --- Датасет ---
class WindowDataset(Dataset):
    def __init__(self, X: np.ndarray, y: np.ndarray, seq_len: int):
        self.X = X
        self.y = y
        self.seq_len = seq_len

    def __len__(self):
        return max(0, self.X.shape[0] - self.seq_len - HORIZON + 1)

    def __getitem__(self, idx):
        x_seq = self.X[idx: idx + self.seq_len]
        y_vec = self.y[idx + self.seq_len: idx + self.seq_len + HORIZON]
        return torch.from_numpy(x_seq).float(), torch.from_numpy(y_vec).float()


In [70]:
# ---------- Фичи ----------
def add_features(df: pd.DataFrame) -> pd.DataFrame:
    out = df.copy()
    out["range_hl"] = out["high"] - out["low"]
    out["close_shift1"] = out["close"].shift(1)
    out["ret1"] = np.log(out["close"] / out["close_shift1"]).replace([np.inf, -np.inf], 0.0).fillna(0.0)
    out["dow"] = out["dt"].dt.dayofweek
    out["dow_sin"] = np.sin(2*np.pi*out["dow"]/7.0)
    out["dow_cos"] = np.cos(2*np.pi*out["dow"]/7.0)
    for w in [3, 5, 7, 13]:
        out[f"sma_{w}"] = out["close"].rolling(w, min_periods=1).mean()
    out = out.drop(columns=["close_shift1"])
    return out

In [71]:
# --- Модель ---
class LSTMForecast(nn.Module):
    def __init__(self, input_dim, hidden_dim=16, num_layers=1, horizon=13):
        super().__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers,
                            batch_first=True, dropout=0.0)
        self.fc = nn.Linear(hidden_dim, horizon)

    def forward(self, x):
        _, (h, _) = self.lstm(x)
        h = h[-1]
        return self.fc(h)

In [75]:
# --- Функции подготовки ---
def make_arrays(df, feature_cols, target_col,
                feat_scaler=None, target_scaler=None):
    X = df[feature_cols].values.astype(np.float32)
    y = df[target_col].values.astype(np.float32).reshape(-1, 1)

    if feat_scaler is None:
        feat_scaler = StandardScaler()
        X = feat_scaler.fit_transform(X)
    else:
        X = feat_scaler.transform(X)

    if target_scaler is None:
        target_scaler = StandardScaler()
        y = target_scaler.fit_transform(y)
    else:
        y = target_scaler.transform(y)

    return X, y.squeeze(-1), feat_scaler, target_scaler


def predict_last_window(model, X, seq_len, device):
    x_seq = torch.from_numpy(X[-seq_len:]).unsqueeze(0).float().to(device)
    with torch.no_grad():
        pred = model(x_seq).cpu().numpy().ravel()
    return pred

In [82]:
def main(args):
    device = torch.device("cuda" if torch.cuda.is_available() and not args.cpu else "cpu")
    print(f"Device: {device}")

    # данные
    train_df = pd.read_csv("train.csv")
    test_df = pd.read_csv("test.csv")

    # --- преобразуем дату ---
    date_col = "dt"

    for df in [train_df, test_df]:
        # Преобразуем колонку dt в datetime
        df[date_col] = pd.to_datetime(df[date_col], errors='coerce')  # некорректные даты станут NaT

        # Проверим, что преобразование прошло успешно
        if df[date_col].isna().any():
            print("В колонке dt есть некорректные даты!")

        # Создаём признаки из даты
        df["year"] = df[date_col].dt.year
        df["month"] = df[date_col].dt.month
        df["day"] = df[date_col].dt.day
        df["weekday"] = df[date_col].dt.weekday

    # Исключаем колонку даты из признаков
    feature_cols = [c for c in train_df.columns if c not in ["close", date_col]]
    target_col = "close"

    X_all, y_all, feat_scaler, target_scaler = make_arrays(train_df, feature_cols, "close")

    # проверяем достаточно ли данных
    min_required = args.seq_len + HORIZON
    if len(train_df) < min_required:
        new_seq_len = max(5, len(train_df) - HORIZON - 1)
        print(f"⚠️ Данных мало ({len(train_df)}). seq_len уменьшаем до {new_seq_len}")
        args.seq_len = new_seq_len

    # делим train/val
    dataset = WindowDataset(X_all, y_all, args.seq_len)
    n_total = len(dataset)
    n_val = max(1, int(n_total * args.val_ratio))
    n_train = max(1, n_total - n_val)
    train_ds, val_ds = random_split(dataset, [n_train, n_val])

    print(f"Train size: {len(train_ds)}, Test size: {len(val_ds)}")

    train_loader = DataLoader(train_ds, batch_size=args.batch_size, shuffle=True)
    val_loader = DataLoader(val_ds, batch_size=args.batch_size)

    # модель
    model = LSTMForecast(len(feature_cols), hidden_dim=args.hidden_dim,
                         num_layers=args.num_layers, horizon=HORIZON).to(device)
    opt = torch.optim.Adam(model.parameters(), lr=args.lr)
    crit = nn.MSELoss()

    # обучение
    best_val = float("inf")
    patience = args.patience

    for epoch in range(1, args.epochs + 1):
        model.train()
        train_losses = []
        for xb, yb in train_loader:
            xb, yb = xb.to(device), yb.to(device)
            opt.zero_grad()
            pred = model(xb)
            loss = crit(pred, yb)
            loss.backward()
            opt.step()
            train_losses.append(loss.item())

        val_losses = []
        model.eval()
        with torch.no_grad():
            for xb, yb in val_loader:
                xb, yb = xb.to(device), yb.to(device)
                pred = model(xb)
                loss = crit(pred, yb)
                val_losses.append(loss.item())

        train_loss = np.mean(train_losses) if train_losses else 0.0
        val_loss = np.mean(val_losses) if val_losses else float("inf")

        if val_loss < best_val:
            best_val = val_loss
            patience = args.patience
            torch.save(model.state_dict(), "best_model.pt")
        else:
            patience -= 1

        print(f"Epoch {epoch:03d} | train_loss={train_loss:.6f} | "
              f"val_loss={val_loss:.6f} | best_val={best_val:.6f} | patience={patience}")

        if patience == 0:
            print("Ранняя остановка.")
            break

    # загрузка лучшей модели
    model.load_state_dict(torch.load("best_model.pt"))

    # предсказания для последнего окна
    preds_scaled = predict_last_window(model, X_all, args.seq_len, device)
    preds = target_scaler.inverse_transform(preds_scaled.reshape(-1, 1)).ravel()

    # сабмит
    submission = pd.DataFrame({"id": np.arange(1, len(preds) + 1),
                               "close": preds})
    submission.to_csv("submission.csv", index=False)
    print("Сабмит сохранён в submission.csv")

In [83]:
if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--seq_len", type=int, default=20)
    parser.add_argument("--hidden_dim", type=int, default=16)
    parser.add_argument("--num_layers", type=int, default=1)
    parser.add_argument("--batch_size", type=int, default=8)
    parser.add_argument("--epochs", type=int, default=200)
    parser.add_argument("--lr", type=float, default=1e-3)
    parser.add_argument("--val_ratio", type=float, default=0.2)
    parser.add_argument("--patience", type=int, default=20)
    parser.add_argument("--cpu", action="store_true")
    args = parser.parse_args()

    main(args)

Device: cpu
Train size: 25, Test size: 6
Epoch 001 | train_loss=1.079144 | val_loss=0.801465 | best_val=0.801465 | patience=20
Epoch 002 | train_loss=0.894695 | val_loss=0.790285 | best_val=0.790285 | patience=20
Epoch 003 | train_loss=0.825249 | val_loss=0.778576 | best_val=0.778576 | patience=20
Epoch 004 | train_loss=1.223972 | val_loss=0.767093 | best_val=0.767093 | patience=20
Epoch 005 | train_loss=0.826729 | val_loss=0.754578 | best_val=0.754578 | patience=20
Epoch 006 | train_loss=1.005680 | val_loss=0.740771 | best_val=0.740771 | patience=20
Epoch 007 | train_loss=0.967917 | val_loss=0.725271 | best_val=0.725271 | patience=20
Epoch 008 | train_loss=1.071183 | val_loss=0.708217 | best_val=0.708217 | patience=20
Epoch 009 | train_loss=0.743105 | val_loss=0.689545 | best_val=0.689545 | patience=20
Epoch 010 | train_loss=0.905349 | val_loss=0.670112 | best_val=0.670112 | patience=20
Epoch 011 | train_loss=0.981699 | val_loss=0.647173 | best_val=0.647173 | patience=20
Epoch 012 | t