In [1]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""
TradeGPT — Transformer for Next‑Candle Prediction + Simple Backtester (No CLI)
=============================================================================

What this script does
---------------------
- Builds a **Transformer Encoder** to predict the **next candle** from recent OHLCV windows.
- Two heads:
  1) **Direction (classification):** probability next close > current close.
  2) **Return (regression):** next‑close % change.
- Chronological train/val/test split, leakage‑safe scaling.
- **Simple backtester** on the test set that converts predictions ➜ positions ➜ equity.
- Saves artifacts in `outputs/YYYY-MM-DD/HHMMSS_SYMBOL_INTERVAL/`:
  - `metrics.json` (test metrics), `model.pt` (weights + scaler stats)
  - `inference.json` (latest-window prediction)
  - `trades.csv`, `equity_curve.csv` (backtest results)

Quick start
-----------
1) Install: `pip install torch pandas numpy scikit-learn yfinance`
2) Open this file and adjust the `Config` block below (symbol/interval/thresholds/costs).
3) Run: `python tradegpt_transformer.py` (no command‑line args needed).

Backtester logic (default)
--------------------------
- At bar *t* (end of window), generate `prob_up` and `next_pct_pred` for *t+1*.
- **Signal:**
  - Long if `prob_up >= long_threshold` (default 0.55)
  - Short if `prob_up <= short_threshold` (default 0.45)
  - Else Flat
- **Execution:** enter at close[t] and exit at close[t+1] (1‑bar hold). This aligns with a
  one‑step‑ahead prediction target. You can later swap to open[t+1] for stricter realism.
- **P&L:**
  - Long P&L% = (close[t+1]/close[t] - 1)
  - Short P&L% = -(close[t+1]/close[t] - 1)
  - Costs: applied per round‑trip on notional: `(cost_bps + slippage_bps)`
    (default 2 bps cost + 3 bps slippage = 5 bps; tune to your venue). 
  - Position size = `capital * position_fraction` (default 1.0 = fully invested on signal)
- **Annualization:** uses bars/day derived from `interval_to_bars_per_day`.

Notes & extensions
------------------
- Add causal masks if you extend to decoder‑style models; encoder here only sees past window.
- Try multi‑symbol training (symbol embeddings) and walk‑forward validation for production.
- To plug into your Groww pipeline, connect `trades.csv` to your execution layer with guards
  (max losers, daily stop, etc.).

"""

from __future__ import annotations
import os, math, json, time, datetime as dt, random
from dataclasses import dataclass
from typing import Tuple

import numpy as np
import pandas as pd

# yfinance optional; fallback to synthetic GBM so the script always runs
try:
    import yfinance as yf
    _HAS_YF = True
except Exception:
    _HAS_YF = False

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, mean_squared_error, mean_absolute_error

# -------------------------
# Reproducibility
# -------------------------
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)

# -------------------------
# Config (edit here)
# -------------------------
@dataclass
class Config:
    symbol: str = "RELIANCE.NS"
    interval: str = "1d"      # yfinance interval (1m, 5m, 15m, 1h, 1d)
    period: str = "730d"       # how far back to fetch
    seq_len: int = 64          # lookback window
    pred_horizon: int = 1      # predict t+1

    # Model
    d_model: int = 128
    nhead: int = 4
    num_layers: int = 3
    dim_feedforward: int = 256
    dropout: float = 0.1

    # Train
    batch_size: int = 128
    epochs: int = 20
    lr: float = 3e-4
    weight_decay: float = 1e-4
    warmup_steps: int = 100
    bce_pos_weight: float = 1.0

    # Split
    val_ratio: float = 0.15
    test_ratio: float = 0.15

    # Backtest
    initial_capital: float = 100_000.0
    position_fraction: float = 1.0        # 0..1 of capital deployed when in position
    long_threshold: float = 0.55          # prob up threshold
    short_threshold: float = 0.45         # prob down threshold
    cost_bps: float = 2.0                 # brokerage/taxes per round trip (basis points)
    slippage_bps: float = 3.0             # simulated slippage per round trip (basis points)

    device: str = "cuda" if torch.cuda.is_available() else "cpu"
    out_dir: str = "outputs"

CFG = Config()

# -------------------------
# Data utilities
# -------------------------

def load_ohlcv(symbol: str, interval: str, period: str) -> pd.DataFrame:
    if _HAS_YF:
        try:
            df = yf.download(symbol, interval=interval, period=period, progress=False)
            if isinstance(df.columns, pd.MultiIndex):
                df.columns = [c[0].capitalize() for c in df.columns]
            else:
                df.columns = [c.capitalize() for c in df.columns]
            df = df.rename(columns={"Adj Close": "Adj_Close"})
            df = df.dropna().copy()
            if len(df) > 0:
                return df
        except Exception:
            pass
    # Synthetic fallback (GBM)
    n = 10_000
    dt_step = 1/96  # ~15m
    mu, sigma = 0.12, 0.25
    prices = [100.0]
    for _ in range(n-1):
        dW = np.sqrt(dt_step) * np.random.randn()
        prices.append(prices[-1]*np.exp((mu-0.5*sigma**2)*dt_step + sigma*dW))
    prices = np.array(prices)
    high = prices * (1 + np.random.rand(n)*0.003)
    low = prices * (1 - np.random.rand(n)*0.003)
    open_ = prices * (1 + (np.random.rand(n)-0.5)*0.001)
    close = prices
    vol = np.random.lognormal(mean=12, sigma=0.5, size=n)
    idx = pd.date_range(end=pd.Timestamp.utcnow(), periods=n, freq="15min")
    return pd.DataFrame({"Open": open_, "High": high, "Low": low, "Close": close, "Volume": vol}, index=idx)


def engineer_features(df: pd.DataFrame) -> pd.DataFrame:
    out = df.copy()
    out["logret"] = np.log(out["Close"].pct_change().add(1))
    out["range"] = (out["High"] - out["Low"]) / out["Close"].shift(1)
    out["body"] = (out["Close"] - out["Open"]) / out["Open"]
    out["upper_tail"] = (out["High"] - out[["Open","Close"]].max(axis=1)) / out["Close"].shift(1)
    out["lower_tail"] = (out[["Open","Close"]].min(axis=1) - out["Low"]) / out["Close"]
    out["vol_z"] = (out["Volume"].replace(0, np.nan).rolling(96).apply(lambda x: (x[-1]-np.nanmean(x))/ (np.nanstd(x)+1e-9), raw=False))
    out = out.dropna().copy()
    return out


def make_supervised(df: pd.DataFrame, seq_len: int, pred_horizon: int):
    feats = ["Open","High","Low","Close","Volume","logret","range","body","upper_tail","lower_tail","vol_z"]
    arr = df[feats].values.astype(np.float32)
    close = df["Close"].values.astype(np.float32)
    X, y_cls, y_reg = [], [], []
    for i in range(seq_len, len(df)-pred_horizon):
        X.append(arr[i-seq_len:i])
        curr_close = close[i-1]
        next_close = close[i+pred_horizon-1]
        y_cls.append(1.0 if next_close > curr_close else 0.0)
        y_reg.append((next_close / curr_close) - 1.0)
    X = np.stack(X)
    y_cls = np.array(y_cls, dtype=np.float32)
    y_reg = np.array(y_reg, dtype=np.float32)
    idx = df.index[seq_len:len(df)-pred_horizon]
    return X, y_cls, y_reg, df.loc[idx]

# -------------------------
# Dataset / Loader
# -------------------------
class CandleDataset(Dataset):
    def __init__(self, X, y_cls, y_reg):
        self.X = X
        self.yc = y_cls
        self.yr = y_reg
    def __len__(self):
        return len(self.X)
    def __getitem__(self, i):
        return (
            torch.from_numpy(self.X[i]),
            torch.tensor(self.yc[i]).view(1),
            torch.tensor(self.yr[i]).view(1)
        )

# -------------------------
# Model
# -------------------------
class PositionalEncoding(nn.Module):
    def __init__(self, d_model: int, dropout: float = 0.1, max_len: int = 10_000):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10_000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0)
        self.register_buffer('pe', pe)
    def forward(self, x):
        x = x + self.pe[:, :x.size(1), :]
        return self.dropout(x)

class TradeGPT(nn.Module):
    def __init__(self, in_dim: int, d_model: int, nhead: int, num_layers: int, dim_feedforward: int, dropout: float):
        super().__init__()
        self.proj = nn.Linear(in_dim, d_model)
        self.pos = PositionalEncoding(d_model, dropout=dropout)
        enc_layer = nn.TransformerEncoderLayer(
            d_model=d_model, nhead=nhead, dim_feedforward=dim_feedforward,
            dropout=dropout, activation='gelu', batch_first=True, norm_first=True,
        )
        self.encoder = nn.TransformerEncoder(enc_layer, num_layers=num_layers)
        self.norm = nn.LayerNorm(d_model)
        self.cls_head = nn.Linear(d_model, 1)
        self.reg_head = nn.Linear(d_model, 1)
    def forward(self, x):
        h = self.proj(x)
        h = self.pos(h)
        h = self.encoder(h)
        h_last = self.norm(h[:, -1, :])
        logit = self.cls_head(h_last)
        reg = self.reg_head(h_last)
        return logit.squeeze(-1), reg.squeeze(-1)

# -------------------------
# Train / Eval helpers
# -------------------------

def cosine_warmup(step, warmup, total_steps):
    if step < warmup:
        return step / max(1, warmup)
    progress = (step - warmup) / max(1, total_steps - warmup)
    return 0.5 * (1.0 + math.cos(math.pi * progress))

class SequenceScaler:
    def __init__(self):
        self.scaler = StandardScaler()
    def fit(self, X: np.ndarray):
        N, T, F = X.shape
        self.scaler.fit(X.reshape(N*T, F))
    def transform(self, X: np.ndarray) -> np.ndarray:
        N, T, F = X.shape
        Y = self.scaler.transform(X.reshape(N*T, F))
        return Y.reshape(N, T, F)


def train_epoch(model, loader, opt, sched, bce, mse, device):
    model.train()
    total, n = 0.0, 0
    for xb, yb_cls, yb_reg in loader:
        xb = xb.to(device)
        yb_cls = yb_cls.to(device).view(-1)
        yb_reg = yb_reg.to(device).view(-1)
        opt.zero_grad(set_to_none=True)
        logit, reg = model(xb)
        loss = bce(logit, yb_cls) + 0.5 * mse(reg, yb_reg)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        # cosine lr w/ warmup
        lr_scale = cosine_warmup(sched["step"], sched["warmup"], sched["total"]) 
        for g in opt.param_groups:
            g['lr'] = g.get('base_lr', g['lr']) * lr_scale
        opt.step()
        total += loss.item(); n += 1
        sched["step"] += 1
    return total / max(1, n)


def evaluate(model, loader, device):
    model.eval()
    logits, regs, yc, yr = [], [], [], []
    with torch.no_grad():
        for xb, yb_cls, yb_reg in loader:
            xb = xb.to(device)
            logit, reg = model(xb)
            logits.append(logit.cpu()); regs.append(reg.cpu())
            yc.append(yb_cls.view(-1)); yr.append(yb_reg.view(-1))
    logits = torch.cat(logits).numpy(); regs = torch.cat(regs).numpy()
    yc = torch.cat(yc).numpy(); yr = torch.cat(yr).numpy()
    prob = 1/(1+np.exp(-logits)); pred = (prob >= 0.5).astype(int)
    acc = float(accuracy_score(yc, pred)); f1 = float(f1_score(yc, pred))
    try:
        auc = float(roc_auc_score(yc, prob))
    except Exception:
        auc = float('nan')
    mse_ = float(mean_squared_error(yr, regs)); mae_ = float(mean_absolute_error(yr, regs))
    return {"acc":acc, "f1":f1, "auc":auc, "mse":mse_, "mae":mae_,
            "prob":prob, "pred_cls":pred, "pred_reg":regs, "y_cls":yc, "y_reg":yr}

# -------------------------
# Backtester
# -------------------------

def interval_to_bars_per_day(interval: str) -> int:
    if interval.endswith('d'):
        return 1
    if interval in {"15m", "15min"}:
        return 25  # ~375 minutes / 15
    if interval in {"5m", "5min"}:
        return 75
    if interval in {"1h", "60m"}:
        return 6.25  # approx; will floor where needed
    return 25


def backtest_on_test_set(aligned_df: pd.DataFrame, test_idx_slice: slice, preds: dict, cfg: Config, run_dir: str):
    """One‑bar ahead hold: enter at close[t], exit at close[t+1]."""
    # aligned_df aligns with the supervised windows (index length = total samples)
    # Split indices
    n_total = len(aligned_df)
    start = test_idx_slice.start
    end = test_idx_slice.stop  # exclusive

    closes = aligned_df["Close"].values.astype(float)
    idx = aligned_df.index

    prob = preds["prob"][start:end]
    next_ret_true = preds["y_reg"][start:end]   # realized next % change

    capital = cfg.initial_capital
    equity = []
    rows = []

    notional = capital * cfg.position_fraction
    roundtrip_cost = (cfg.cost_bps + cfg.slippage_bps) / 10_000.0

    for i in range(start, end-1):  # need i+1 for exit
        ts = idx[i]
        p_up = float(prob[i-start])
        # Signal
        if p_up >= cfg.long_threshold:
            side = 1
        elif p_up <= cfg.short_threshold:
            side = -1
        else:
            side = 0
        # Realized one‑bar return
        r = (closes[i+1] / closes[i]) - 1.0
        trade_ret = side * r
        # Apply costs only if we actually traded (non‑flat)
        cost = notional * roundtrip_cost if side != 0 else 0.0
        pnl = notional * trade_ret - cost
        capital += pnl
        equity.append([ts, capital])
        rows.append({
            "time": ts, "prob_up": p_up, "signal": side,
            "ret_next_bar": r, "trade_ret": trade_ret,
            "pnl": pnl, "equity": capital
        })

    eq_df = pd.DataFrame(equity, columns=["time","equity"]).set_index("time")
    tr_df = pd.DataFrame(rows)

    # Metrics
    rets = tr_df["trade_ret"].fillna(0.0).values
    bars_per_day = interval_to_bars_per_day(CFG.interval)
    bars_per_year = int(252 * bars_per_day)
    # Equity returns based on capital changes
    eq_returns = eq_df["equity"].pct_change().fillna(0.0).values
    cum_return = float((eq_df["equity"].iloc[-1] / cfg.initial_capital) - 1.0)
    avg = float(np.mean(eq_returns))
    std = float(np.std(eq_returns) + 1e-12)
    sharpe = float((avg / std) * np.sqrt(bars_per_year)) if std > 0 else float('nan')
    max_dd = 0.0
    peak = -1e18
    for v in eq_df["equity"].values:
        peak = max(peak, v)
        max_dd = min(max_dd, (v/peak)-1.0)
    stats = {
        "final_equity": float(eq_df["equity"].iloc[-1]),
        "total_return_pct": 100*cum_return,
        "max_drawdown_pct": 100*max_dd,
        "sharpe": sharpe,
        "trades": int((tr_df["signal"]!=0).sum())
    }

    eq_df.to_csv(os.path.join(run_dir, "equity_curve.csv"))
    tr_df.to_csv(os.path.join(run_dir, "trades.csv"), index=False)
    with open(os.path.join(run_dir, "bt_stats.json"), "w") as f:
        json.dump(stats, f, indent=2)
    return stats

# -------------------------
# Main pipeline
# -------------------------

def main(cfg: Config):
    ts = dt.datetime.now().strftime("%Y-%m-%d/%H%M%S")
    run_dir = os.path.join(cfg.out_dir, ts + f"_{cfg.symbol}_{cfg.interval}")
    os.makedirs(run_dir, exist_ok=True)

    # 1) Data & features
    raw = load_ohlcv(cfg.symbol, cfg.interval, cfg.period)
    df = engineer_features(raw)

    # 2) Supervised windows
    X, y_cls, y_reg, aligned = make_supervised(df, cfg.seq_len, cfg.pred_horizon)

    # 3) Chrono split
    n = len(X)
    n_test = int(n * cfg.test_ratio)
    n_val  = int(n * cfg.val_ratio)
    n_train = n - n_val - n_test
    assert n_train > 100, "Not enough samples after split; reduce seq_len or increase period."

    X_tr, X_va, X_te = X[:n_train], X[n_train:n_train+n_val], X[n_train+n_val:]
    yc_tr, yc_va, yc_te = y_cls[:n_train], y_cls[n_train:n_train+n_val], y_cls[n_train+n_val:]
    yr_tr, yr_va, yr_te = y_reg[:n_train], y_reg[n_train:n_train+n_val], y_reg[n_train+n_val:]

    # 4) Scale using TRAIN only
    scaler = SequenceScaler(); scaler.fit(X_tr)
    X_tr = scaler.transform(X_tr); X_va = scaler.transform(X_va); X_te = scaler.transform(X_te)

    # 5) Loaders
    train_ds = CandleDataset(X_tr, yc_tr, yr_tr)
    val_ds   = CandleDataset(X_va, yc_va, yr_va)
    test_ds  = CandleDataset(X_te, yc_te, yr_te)
    train_loader = DataLoader(train_ds, batch_size=cfg.batch_size, shuffle=True, drop_last=True)
    val_loader   = DataLoader(val_ds, batch_size=cfg.batch_size, shuffle=False)
    test_loader  = DataLoader(test_ds, batch_size=cfg.batch_size, shuffle=False)

    # 6) Model
    in_dim = X.shape[-1]
    model = TradeGPT(in_dim, cfg.d_model, cfg.nhead, cfg.num_layers, cfg.dim_feedforward, cfg.dropout).to(cfg.device)

    # 7) Loss & optim
    bce = nn.BCEWithLogitsLoss(pos_weight=torch.tensor(cfg.bce_pos_weight, device=cfg.device))
    mse = nn.MSELoss()
    opt = torch.optim.AdamW(model.parameters(), lr=cfg.lr, weight_decay=cfg.weight_decay)
    for g in opt.param_groups: g['base_lr'] = cfg.lr
    total_steps = cfg.epochs * max(1, len(train_loader))
    sched = {"step": 0, "warmup": cfg.warmup_steps, "total": total_steps}

    # 8) Train with early stop on val F1
    best_f1, best_state, no_imp, patience = -1.0, None, 0, 5
    for ep in range(1, cfg.epochs+1):
        tr_loss = train_epoch(model, train_loader, opt, sched, bce, mse, cfg.device)
        val = evaluate(model, val_loader, cfg.device)
        print(f"Epoch {ep:02d} | train_loss={tr_loss:.4f} | val_acc={val['acc']:.4f} | val_f1={val['f1']:.4f} | val_auc={val['auc']:.4f} | val_mse={val['mse']:.6f}")
        if val['f1'] > best_f1:
            best_f1 = val['f1']; best_state = {k:v.cpu() for k,v in model.state_dict().items()}; no_imp = 0
        else:
            no_imp += 1
            if no_imp >= patience:
                print("Early stopping on val F1."); break

    if best_state is not None:
        model.load_state_dict(best_state)

    # 9) Test metrics
    test = evaluate(model, test_loader, cfg.device)
    print("Test:", json.dumps({k: round(test[k],6) for k in ['acc','f1','auc','mse','mae']}, indent=2))

    # 10) Save artifacts
    meta = {
        "symbol": cfg.symbol, "interval": cfg.interval, "period": cfg.period,
        "seq_len": cfg.seq_len, "pred_horizon": cfg.pred_horizon,
        "metrics": {k: float(test[k]) if isinstance(test[k], float) else test[k] for k in ["acc","f1","auc","mse","mae"]}
    }
    os.makedirs(run_dir, exist_ok=True)
    with open(os.path.join(run_dir, "metrics.json"), "w") as f: json.dump(meta, f, indent=2)
    torch.save({"state_dict": model.state_dict(), "scaler_mean": scaler.scaler.mean_.tolist(), "scaler_scale": scaler.scaler.scale_.tolist()}, os.path.join(run_dir, "model.pt"))

    # 11) Inference (latest window)
    latest = X[-1:]
    latest_scaled = scaler.transform(latest)
    model.eval();
    with torch.no_grad():
        xb = torch.from_numpy(latest_scaled).to(cfg.device)
        logit, reg = model(xb)
        prob_up = float(torch.sigmoid(logit).cpu().item())
        next_pct = float(reg.cpu().item())
    last_close = float(df["Close"].iloc[-1])
    next_close_pred = last_close * (1.0 + next_pct)
    inf = {"prob_next_close_up": prob_up, "predicted_next_close": next_close_pred, "last_close": last_close, "predicted_next_close_pct_change": next_pct, "timestamp_last": str(df.index[-1])}
    with open(os.path.join(run_dir, "inference.json"), "w") as f: json.dump(inf, f, indent=2)

    # 12) Backtest on the test slice
    # Map the test slice into the aligned index space
    start = n - (n_test + n_val)
    test_slice = slice(n_train + n_val - n_train, n)  # relative slice for preds, absolute below
    # But our evaluate(test_loader) returned arrays matching X_te order; we need absolute indices:
    # Absolute start index within aligned df for test set
    abs_start = n_train + n_val
    abs_end = n
    preds_all = {
        "prob": np.concatenate([evaluate(model, DataLoader(CandleDataset(X_tr, yc_tr, yr_tr), batch_size=cfg.batch_size, shuffle=False), cfg.device)["prob"],
                                 evaluate(model, val_loader, cfg.device)["prob"],
                                 test["prob"]]),
        "y_reg": np.concatenate([evaluate(model, DataLoader(CandleDataset(X_tr, yc_tr, yr_tr), batch_size=cfg.batch_size, shuffle=False), cfg.device)["y_reg"],
                                  evaluate(model, val_loader, cfg.device)["y_reg"],
                                  test["y_reg"]])
    }
    bt_stats = backtest_on_test_set(aligned, slice(abs_start, abs_end), preds_all, cfg, run_dir)
    print("Backtest stats:", json.dumps(bt_stats, indent=2))

    print(f"Artifacts saved to: {run_dir}")


if __name__ == "__main__":
    main(CFG)


  df = yf.download(symbol, interval=interval, period=period, progress=False)
  out["vol_z"] = (out["Volume"].replace(0, np.nan).rolling(96).apply(lambda x: (x[-1]-np.nanmean(x))/ (np.nanstd(x)+1e-9), raw=False))


Epoch 01 | train_loss=0.9667 | val_acc=0.5070 | val_f1=0.0000 | val_auc=0.5317 | val_mse=0.540099
Epoch 02 | train_loss=0.9490 | val_acc=0.5070 | val_f1=0.0000 | val_auc=0.5294 | val_mse=0.484023
Epoch 03 | train_loss=0.9486 | val_acc=0.5070 | val_f1=0.0000 | val_auc=0.5286 | val_mse=0.392227
Epoch 04 | train_loss=0.8893 | val_acc=0.5070 | val_f1=0.0000 | val_auc=0.5238 | val_mse=0.281004
Epoch 05 | train_loss=0.8581 | val_acc=0.4789 | val_f1=0.0000 | val_auc=0.5230 | val_mse=0.171366
Epoch 06 | train_loss=0.8377 | val_acc=0.4789 | val_f1=0.0000 | val_auc=0.5167 | val_mse=0.090743
Early stopping on val F1.
Test: {
  "acc": 0.450704,
  "f1": 0.0,
  "auc": 0.370994,
  "mse": 0.07062,
  "mae": 0.220208
}
Backtest stats: {
  "final_equity": 73314.69910616174,
  "total_return_pct": -26.68530089383826,
  "max_drawdown_pct": -29.49551669723568,
  "sharpe": -3.2812994309188803,
  "trades": 66
}
Artifacts saved to: outputs/2025-11-03/011727_RELIANCE.NS_1d
