# BTC Turning-Point Prediction Pipeline

**Notebook purpose:** a complete, runnable skeleton to train probabilistic turning-point models
using historical Binance Futures-derived 15m candles and related endpoints. The notebook contains:

- Data ingestion placeholders (replace with paths to your CSVs)
- Volatility-adaptive triple-barrier labeling
- Feature engineering examples
- LightGBM baseline training + calibration
- PyTorch Transformer encoder model skeleton and training loop
- Saving and inference stubs

**Notes:** This notebook is a starting point. Replace file paths and expand feature engineering as needed.


In [None]:
import os, gc, pickle, math, time
from datetime import datetime, timedelta
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('darkgrid')
from sklearn.model_selection import train_test_split, TimeSeriesSplit
from sklearn.calibration import CalibratedClassifierCV
from sklearn.metrics import roc_auc_score, precision_recall_fscore_support, brier_score_loss
import lightgbm as lgb
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
print('pandas', pd.__version__, 'numpy', np.__version__, 'torch', torch.__version__)

## 1) Data loading & preprocessing

Replace the placeholder paths with your CSVs exported from Binance. This example demonstrates how to load `klines.csv` (15m) and basic trades/aggtrades merging.


In [None]:
# Placeholder: load klines (15m). Replace path with your file.
KLINES_CSV = '/mnt/data/klines_15m_sample.csv'  # replace with your real file
if os.path.exists(KLINES_CSV):
    klines = pd.read_csv(KLINES_CSV, parse_dates=['open_time'])
    klines.set_index('open_time', inplace=True)
    klines = klines.sort_index()
    print('Loaded klines', klines.shape)
else:
    dates = pd.date_range('2025-01-01', periods=2000, freq='15T', tz='UTC')
    price = 50000 + np.cumsum(np.random.randn(len(dates))*50)
    klines = pd.DataFrame(index=dates)
    klines['open'] = price + np.random.randn(len(dates))*5
    klines['high'] = klines['open'] + np.abs(np.random.randn(len(dates))*20)
    klines['low']  = klines['open'] - np.abs(np.random.randn(len(dates))*20)
    klines['close'] = price + np.random.randn(len(dates))*5
    klines['volume'] = np.abs(np.random.randn(len(dates))*10)
    print('Created synthetic klines', klines.shape)

if klines.index.tz is None:
    klines = klines.tz_localize('UTC')
else:
    klines = klines.tz_convert('UTC')
klines.head()

## 2) Triple-barrier labeling (volatility-adaptive)

This labels each bar as UP/DOWN/NEUTRAL based on ATR-scaled upper/lower barriers and a time horizon `n` bars.


In [None]:
def compute_atr(df, atr_len=14):
    high = df['high']; low = df['low']; close = df['close']
    tr1 = (high - low).abs()
    tr2 = (high - close.shift()).abs()
    tr3 = (low - close.shift()).abs()
    tr = pd.concat([tr1, tr2, tr3], axis=1).max(axis=1)
    atr = tr.ewm(span=atr_len, adjust=False).mean()
    return atr

def triple_barrier_labels(df, n=16, k_up=1.5, k_dn=1.5, atr_len=14, drop_na=True):
    # returns labels: 1=UP, 0=DOWN, 2=NEUTRAL
    df2 = df.copy()
    atr = compute_atr(df2, atr_len=atr_len)
    df2['atr'] = atr
    price = df2['close'].values
    up_mult = (k_up * df2['atr'] / df2['close']).fillna(method='bfill').values
    dn_mult = (k_dn * df2['atr'] / df2['close']).fillna(method='bfill').values
    y = np.full(len(df2), 2, dtype=np.int8)
    highs = df2['high'].values
    lows = df2['low'].values
    L = len(df2)
    for i in range(L - n):
        p0 = price[i]
        up = p0 * (1 + up_mult[i])
        dn = p0 * (1 - dn_mult[i])
        hi_path = highs[i+1:i+n+1]
        lo_path = lows[i+1:i+n+1]
        hit_up = np.where(hi_path >= up)[0]
        hit_dn = np.where(lo_path <= dn)[0]
        if hit_up.size and (not hit_dn.size or hit_up[0] < hit_dn[0]):
            y[i] = 1
        elif hit_dn.size and (not hit_up.size or hit_dn[0] < hit_up[0]):
            y[i] = 0
        else:
            y[i] = 2
    labels = pd.Series(y, index=df2.index, name='label')
    if drop_na:
        labels.iloc[-n:] = 2
    return labels

labels = triple_barrier_labels(klines, n=16, k_up=1.5, k_dn=1.5, atr_len=14)
print(labels.value_counts())
labels.head(20)

## 3) Feature engineering (examples)

Create a set of rolling and technical features. Expand these with trade/aggtrades/bookdepth features from your CSVs.


In [None]:
def make_features(df):
    X = pd.DataFrame(index=df.index)
    X['close'] = df['close']
    X['ret_1'] = df['close'].pct_change()
    for lag in [1,2,4,8,16,32]:
        X[f'ret_{lag}'] = df['close'].pct_change(lag)
    X['atr14'] = compute_atr(df, atr_len=14)
    X['rv_24'] = np.log(df['close']).diff().rolling(window=96).std()
    X['ema_8'] = df['close'].ewm(span=8).mean()
    X['ema_21'] = df['close'].ewm(span=21).mean()
    X['ema_diff'] = X['ema_8'] - X['ema_21']
    delta = df['close'].diff()
    up = delta.clip(lower=0)
    down = -1 * delta.clip(upper=0)
    rs = up.ewm(span=14).mean() / (down.ewm(span=14).mean() + 1e-12)
    X['rsi14'] = 100 - 100/(1+rs)
    X['mom_12'] = df['close'] / df['close'].shift(12) - 1
    X['vol'] = df['volume']
    X['vol_24'] = X['vol'].rolling(window=96).median().fillna(method='bfill')
    X['vol_surge'] = X['vol'] / (X['vol_24'] + 1e-9)
    X = X.fillna(method='bfill').fillna(0)
    return X

X = make_features(klines)
X.head()

## 4) Train/test split

Using your specified date ranges: Train 2025-02-06 → 2025-06-06, Backtest 2025-06-07 → 2025-08-25. Adjust if your data spans different years.


In [None]:
train_start = pd.Timestamp('2025-02-06', tz='UTC')
train_end   = pd.Timestamp('2025-06-06', tz='UTC')
test_start  = pd.Timestamp('2025-06-07', tz='UTC')
test_end    = pd.Timestamp('2025-08-25', tz='UTC')

train_mask = (X.index >= train_start) & (X.index <= train_end)
test_mask  = (X.index >= test_start) & (X.index <= test_end)

if train_mask.sum() < 50:
    print('Warning: train mask empty or too small for these example/synthetic data. Using first 70% for train.')
    split = int(len(X)*0.7)
    X_train, X_test = X.iloc[:split], X.iloc[split:]
    y_train, y_test = labels.iloc[:split], labels.iloc[split:]
else:
    X_train, X_test = X.loc[train_mask], X.loc[test_mask]
    y_train, y_test = labels.loc[train_mask], labels.loc[test_mask]

print('Train rows:', X_train.shape[0], 'Test rows:', X_test.shape[0])
X_train.shape, X_test.shape

## 5) LightGBM baseline (multiclass + calibration)

Train a LightGBM multiclass classifier and calibrate with isotonic regression.


In [None]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y_train_enc = le.fit_transform(y_train.values)
y_test_enc  = le.transform(y_test.values)

keep_neutral = True

if not keep_neutral:
    mask_tr = y_train_enc != 2
    mask_te = y_test_enc != 2
    X_train_b = X_train.loc[mask_tr]; y_train_b = y_train_enc[mask_tr]
    X_test_b  = X_test.loc[mask_te];  y_test_b  = y_test_enc[mask_te]
else:
    X_train_b, y_train_b = X_train, y_train_enc
    X_test_b,  y_test_b  = X_test,  y_test_enc

print('Classes:', np.unique(y_train_b))

params = {
    'objective': 'multiclass' if keep_neutral else 'multiclass',
    'num_class': 3 if keep_neutral else 2,
    'learning_rate': 0.03,
    'n_estimators': 1000,
    'num_leaves': 64,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'reg_alpha': 1.0,
    'reg_lambda': 1.0,
    'random_state': 42,
    'n_jobs': -1
}

dtrain = lgb.Dataset(X_train_b, label=y_train_b)
dval = lgb.Dataset(X_test_b, label=y_test_b, reference=dtrain)
bst = lgb.train(params, dtrain, valid_sets=[dtrain,dval], early_stopping_rounds=50, verbose_eval=50)

from sklearn.calibration import CalibratedClassifierCV
clf = lgb.LGBMClassifier(**{k:v for k,v in params.items() if k in ['learning_rate','n_estimators','num_leaves','subsample','colsample_bytree','reg_alpha','reg_lambda','random_state','n_jobs']})
clf.fit(X_train_b, y_train_b)
calibrator = CalibratedClassifierCV(clf, method='isotonic', cv=3)
calibrator.fit(X_train_b, y_train_b)

probs = calibrator.predict_proba(X_test_b)
if probs.shape[1] == 3:
    proba_up = probs[:,1]; proba_down = probs[:,0]; proba_neu = probs[:,2]
else:
    proba_up = probs[:,1]; proba_down = probs[:,0]; proba_neu = np.zeros_like(proba_up)

print('Test Brier for UP class:', brier_score_loss((y_test_b==1).astype(int), proba_up))
print('Test ROC AUC (one-vs-rest) for UP:', roc_auc_score((y_test_b==1).astype(int), proba_up))

## 6) Transformer model skeleton (PyTorch)

This builds a small Transformer encoder to ingest sequence windows and output class probabilities. You can train it on many years by batching windows.


In [None]:
class SeqDataset(Dataset):
    def __init__(self, X_df, y_ser, window=128, features=None):
        self.X = X_df
        self.y = y_ser
        self.idx = np.arange(len(self.X))
        self.window = window
        self.features = X_df.columns.tolist() if features is None else features
    def __len__(self):
        return max(0, len(self.X) - self.window)
    def __getitem__(self, i):
        start = i
        end = i + self.window
        x = self.X.iloc[start:end][self.features].values.astype('float32')
        y = int(self.y.iloc[start + self.window - 1])
        return torch.from_numpy(x), torch.tensor(y, dtype=torch.long)

class TimeTransformer(nn.Module):
    def __init__(self, n_features, d_model=64, n_heads=4, num_layers=2, num_classes=3, dropout=0.1):
        super().__init__()
        self.input_proj = nn.Linear(n_features, d_model)
        encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=n_heads, dim_feedforward= d_model*4, dropout=dropout, activation='gelu')
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.pool = nn.AdaptiveAvgPool1d(1)
        self.fc = nn.Sequential(nn.LayerNorm(d_model), nn.Linear(d_model, num_classes))
    def forward(self, x):
        x = self.input_proj(x)
        x = x.permute(1,0,2)
        x = self.transformer(x)
        x = x.permute(1,2,0)
        x = self.pool(x).squeeze(-1)
        logits = self.fc(x)
        return logits

features = X.columns.tolist()
window = 64
ds = SeqDataset(X.fillna(0), labels.fillna(2).astype(int), window=window, features=features)
if len(ds) > 0:
    loader = DataLoader(ds, batch_size=32, shuffle=True)
    model = TimeTransformer(n_features=len(features), d_model=64, n_heads=4, num_layers=2, num_classes=3)
    xb, yb = next(iter(loader))
    logits = model(xb)
    print('model output shape', logits.shape)

In [None]:
def train_transformer(model, train_ds, val_ds, epochs=5, lr=1e-4, device='cpu'):
    model.to(device)
    opt = torch.optim.Adam(model.parameters(), lr=lr)
    crit = nn.CrossEntropyLoss()
    train_loader = DataLoader(train_ds, batch_size=64, shuffle=True, drop_last=True)
    val_loader = DataLoader(val_ds, batch_size=128, shuffle=False)
    best_loss = 1e9
    for ep in range(epochs):
        model.train(); tot=0; acc=0; loss_sum=0.0
        for xb,yb in train_loader:
            xb = xb.to(device); yb=yb.to(device)
            opt.zero_grad()
            logits = model(xb)
            loss = crit(logits, yb)
            loss.backward(); opt.step()
            loss_sum += loss.item(); tot+=1
        model.eval(); vloss=0; cnt=0
        with torch.no_grad():
            for xb,yb in val_loader:
                xb=xb.to(device); yb=yb.to(device)
                logits = model(xb)
                vloss += crit(logits, yb).item(); cnt+=1
        vloss = vloss / max(1,cnt)
        print(f'EP {ep+1}/{epochs} train_loss={loss_sum/tot:.4f} val_loss={vloss:.4f}')
        if vloss < best_loss:
            best_loss = vloss
            torch.save(model.state_dict(), '/mnt/data/transformer_best.pth')
    return model

n_total = len(X)
split_idx = int(n_total*0.7)
train_ds = SeqDataset(X.iloc[:split_idx].fillna(0), labels.iloc[:split_idx].fillna(2).astype(int), window=window, features=features)
val_ds = SeqDataset(X.iloc[split_idx:].fillna(0), labels.iloc[:split_idx].fillna(2).astype(int), window=window, features=features)
if len(train_ds)>0 and len(val_ds)>0:
    model = TimeTransformer(n_features=len(features), d_model=64, n_heads=4, num_layers=2, num_classes=3)
    trained = train_transformer(model, train_ds, val_ds, epochs=2, lr=3e-4, device='cpu')

## 7) Save models & inference stub

Save LightGBM and Transformer artifacts; provide a `predict()` function that given latest features and sequence returns calibrated probabilities.


In [None]:
with open('/mnt/data/lgbm_model.pkl', 'wb') as f:
    pickle.dump(clf, f)
with open('/mnt/data/calibrator.pkl', 'wb') as f:
    pickle.dump(calibrator, f)
print('Saved LightGBM and calibrator.')


def predict_live(latest_df_row, recent_seq_df, feature_list, device='cpu'):
    with open('/mnt/data/lgbm_model.pkl','rb') as f:
        lgbm = pickle.load(f)
    with open('/mnt/data/calibrator.pkl','rb') as f:
        cal = pickle.load(f)
    Xr = latest_df_row[feature_list].values.reshape(1,-1)
    proba_lgb = cal.predict_proba(Xr)[0]
    model = TimeTransformer(n_features=len(feature_list), d_model=64, n_heads=4, num_layers=2, num_classes=3)
    model.load_state_dict(torch.load('/mnt/data/transformer_best.pth', map_location=device))
    model.eval()
    seq = torch.from_numpy(recent_seq_df[feature_list].values.astype('float32')).unsqueeze(0)
    with torch.no_grad():
        logits = model(seq)
        proba_tr = torch.softmax(logits, dim=1).cpu().numpy()[0]
    eps = 1e-9
    logit_lgb = np.log(proba_lgb + eps)
    logit_tr  = np.log(proba_tr  + eps)
    logit_avg = (logit_lgb + logit_tr) / 2.0
    proba_ens = np.exp(logit_avg) / np.exp(logit_avg).sum()
    return {'proba_up': float(proba_ens[1]), 'proba_down': float(proba_ens[0]), 'proba_neutral': float(proba_ens[2])}

if len(X_test)>0:
    row = X_test.iloc[-1]
    seq_df = X_test.iloc[-128:] if len(X_test)>=128 else X_test
    print('Example live prediction:', predict_live(row, seq_df, features))

Notebook saved to `/mnt/data/training_pipeline.ipynb`. You can download it from the link below.