# Model Development: LSTM for Order Book Forecasting

This notebook demonstrates:
1. Preparing training data from engineered features
2. Building and training LSTM models
3. Model evaluation and performance analysis
4. Comparing different architectures

**Objective**: Predict short-term price direction (up/down/flat) from order book microstructure.

In [None]:
# Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score

import sys
sys.path.append('..')

from src.features.feature_pipeline import FeaturePipeline, FeaturePipelineConfig, create_training_dataset
from src.models.lstm_model import OrderBookLSTM, AttentionLSTM, count_parameters

# Settings
plt.style.use('seaborn-v0_8-darkgrid')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

print(f"Device: {device}")
print("✅ Libraries loaded")

## 1. Generate Data and Extract Features

In [None]:
# Generate synthetic order book data
def generate_order_book_data(n=2000):
    np.random.seed(42)
    snapshots = []
    mid_price = 50000.0
    
    for i in range(n):
        # Add trend and noise
        mid_price += np.random.normal(0, 10)
        
        bids = []
        asks = []
        
        for j in range(20):
            bid_price = mid_price - (j + 1) * 0.5
            ask_price = mid_price + (j + 1) * 0.5
            
            # Volume with imbalance
            imbalance = np.random.normal(0, 10)
            bid_vol = max(1, 50 + imbalance + np.random.normal(0, 10))
            ask_vol = max(1, 50 - imbalance + np.random.normal(0, 10))
            
            bids.append([bid_price, bid_vol])
            asks.append([ask_price, ask_vol])
        
        snapshots.append({
            'timestamp': i,
            'exchange': 'binance',
            'symbol': 'BTCUSDT',
            'bids': bids,
            'asks': asks
        })
    
    return pd.DataFrame(snapshots)

# Generate data
print("Generating order book data...")
df = generate_order_book_data(n=2000)
print(f"✅ Generated {len(df):,} snapshots")

In [None]:
# Extract features
config = FeaturePipelineConfig(
    ofi_levels=[1, 5],
    ofi_windows=[10],
    volatility_windows=[20],
    ohlc_bar_size=10
)

pipeline = FeaturePipeline(config)

print("Extracting features (this may take a moment)...")
features_df = pipeline.compute_all_features(df, include_volatility=False)
print(f"✅ Features extracted: {len(features_df.columns)} columns")

## 2. Prepare Training Data

In [None]:
# Create training dataset
dataset = create_training_dataset(
    features_df,
    prediction_horizon=50,
    threshold_bps=5.0,
    sequence_length=100
)

X = dataset['X']
y = dataset['y']
feature_names = dataset['feature_names']

print(f"\nDataset shape:")
print(f"  X: {X.shape} (samples, sequence_length, features)")
print(f"  y: {y.shape} (samples,)")
print(f"\nClass distribution:")
unique, counts = np.unique(y, return_counts=True)
for cls, count in zip(unique, counts):
    class_name = ['Down', 'Flat', 'Up'][cls]
    print(f"  {class_name}: {count:,} ({count/len(y)*100:.1f}%)")

In [None]:
# Train/val/test split
X_temp, X_test, y_temp, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

X_train, X_val, y_train, y_val = train_test_split(
    X_temp, y_temp, test_size=0.2, random_state=42, stratify=y_temp
)

print(f"\nSplit sizes:")
print(f"  Train: {len(X_train):,} ({len(X_train)/len(X)*100:.1f}%)")
print(f"  Val: {len(X_val):,} ({len(X_val)/len(X)*100:.1f}%)")
print(f"  Test: {len(X_test):,} ({len(X_test)/len(X)*100:.1f}%)")

In [None]:
# Normalize features
scaler = StandardScaler()

# Reshape for scaling
n_samples, seq_len, n_features = X_train.shape
X_train_reshaped = X_train.reshape(-1, n_features)
X_val_reshaped = X_val.reshape(-1, n_features)
X_test_reshaped = X_test.reshape(-1, n_features)

# Fit and transform
X_train_scaled = scaler.fit_transform(X_train_reshaped).reshape(n_samples, seq_len, n_features)
X_val_scaled = scaler.transform(X_val_reshaped).reshape(len(X_val), seq_len, n_features)
X_test_scaled = scaler.transform(X_test_reshaped).reshape(len(X_test), seq_len, n_features)

print("✅ Features normalized")

## 3. Create PyTorch Datasets and Loaders

In [None]:
class OrderBookDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.FloatTensor(X)
        self.y = torch.LongTensor(y)
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# Create datasets
train_dataset = OrderBookDataset(X_train_scaled, y_train)
val_dataset = OrderBookDataset(X_val_scaled, y_val)
test_dataset = OrderBookDataset(X_test_scaled, y_test)

# Create loaders
batch_size = 64

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

print(f"✅ Data loaders created (batch_size={batch_size})")

## 4. Build and Train LSTM Model

In [None]:
# Model configuration
input_size = n_features
hidden_size = 128
num_layers = 2
num_classes = 3
dropout = 0.3

# Create model
model = OrderBookLSTM(
    input_size=input_size,
    hidden_size=hidden_size,
    num_layers=num_layers,
    num_classes=num_classes,
    dropout=dropout
).to(device)

print("="*80)
print("MODEL ARCHITECTURE")
print("="*80)
print(model)
print(f"\nTotal parameters: {count_parameters(model):,}")
print("="*80)

In [None]:
# Training setup
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=0.0001)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5)

num_epochs = 30

# Training history
history = {
    'train_loss': [],
    'val_loss': [],
    'train_acc': [],
    'val_acc': []
}

print("Starting training...")
print(f"Epochs: {num_epochs}, Batch size: {batch_size}\n")

In [None]:
# Training loop
for epoch in range(num_epochs):
    # Training
    model.train()
    train_loss = 0.0
    train_correct = 0
    train_total = 0
    
    for batch_X, batch_y in train_loader:
        batch_X, batch_y = batch_X.to(device), batch_y.to(device)
        
        optimizer.zero_grad()
        logits, _ = model(batch_X)
        loss = criterion(logits, batch_y)
        
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()
        
        train_loss += loss.item()
        _, predicted = torch.max(logits, 1)
        train_correct += (predicted == batch_y).sum().item()
        train_total += batch_y.size(0)
    
    train_loss /= len(train_loader)
    train_acc = train_correct / train_total
    
    # Validation
    model.eval()
    val_loss = 0.0
    val_correct = 0
    val_total = 0
    
    with torch.no_grad():
        for batch_X, batch_y in val_loader:
            batch_X, batch_y = batch_X.to(device), batch_y.to(device)
            
            logits, _ = model(batch_X)
            loss = criterion(logits, batch_y)
            
            val_loss += loss.item()
            _, predicted = torch.max(logits, 1)
            val_correct += (predicted == batch_y).sum().item()
            val_total += batch_y.size(0)
    
    val_loss /= len(val_loader)
    val_acc = val_correct / val_total
    
    # Update scheduler
    scheduler.step(val_loss)
    
    # Save history
    history['train_loss'].append(train_loss)
    history['val_loss'].append(val_loss)
    history['train_acc'].append(train_acc)
    history['val_acc'].append(val_acc)
    
    # Print progress
    if (epoch + 1) % 5 == 0:
        print(f"Epoch [{epoch+1}/{num_epochs}]")
        print(f"  Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}")
        print(f"  Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")
        print()

print("✅ Training complete!")

## 5. Visualize Training History

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(15, 5))

# Loss
axes[0].plot(history['train_loss'], label='Train Loss', linewidth=2)
axes[0].plot(history['val_loss'], label='Val Loss', linewidth=2)
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Loss')
axes[0].set_title('Training and Validation Loss')
axes[0].legend()
axes[0].grid(alpha=0.3)

# Accuracy
axes[1].plot(history['train_acc'], label='Train Accuracy', linewidth=2)
axes[1].plot(history['val_acc'], label='Val Accuracy', linewidth=2)
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Accuracy')
axes[1].set_title('Training and Validation Accuracy')
axes[1].legend()
axes[1].grid(alpha=0.3)

plt.tight_layout()
plt.savefig('../data/simulations/training_history.png', dpi=150, bbox_inches='tight')
plt.show()

print("✅ Training curves saved")

## 6. Evaluate on Test Set

In [None]:
# Test evaluation
model.eval()
y_true = []
y_pred = []
y_proba = []

with torch.no_grad():
    for batch_X, batch_y in test_loader:
        batch_X = batch_X.to(device)
        
        logits, _ = model(batch_X)
        probs = torch.softmax(logits, dim=1)
        _, predicted = torch.max(logits, 1)
        
        y_true.extend(batch_y.cpu().numpy())
        y_pred.extend(predicted.cpu().numpy())
        y_proba.extend(probs.cpu().numpy())

y_true = np.array(y_true)
y_pred = np.array(y_pred)
y_proba = np.array(y_proba)

# Classification report
print("="*80)
print("TEST SET EVALUATION")
print("="*80)
print(classification_report(y_true, y_pred, target_names=['Down', 'Flat', 'Up']))

# Overall accuracy
test_accuracy = (y_true == y_pred).mean()
print(f"\nOverall Test Accuracy: {test_accuracy:.4f}")
print("="*80)

## 7. Confusion Matrix

In [None]:
# Compute confusion matrix
cm = confusion_matrix(y_true, y_pred)
cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

# Plot
fig, axes = plt.subplots(1, 2, figsize=(14, 6))

# Raw counts
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=['Down', 'Flat', 'Up'],
            yticklabels=['Down', 'Flat', 'Up'],
            ax=axes[0])
axes[0].set_title('Confusion Matrix (Counts)')
axes[0].set_ylabel('True Label')
axes[0].set_xlabel('Predicted Label')

# Normalized
sns.heatmap(cm_normalized, annot=True, fmt='.2f', cmap='Blues',
            xticklabels=['Down', 'Flat', 'Up'],
            yticklabels=['Down', 'Flat', 'Up'],
            ax=axes[1])
axes[1].set_title('Confusion Matrix (Normalized)')
axes[1].set_ylabel('True Label')
axes[1].set_xlabel('Predicted Label')

plt.tight_layout()
plt.savefig('../data/simulations/confusion_matrix.png', dpi=150, bbox_inches='tight')
plt.show()

print("✅ Confusion matrix saved")

## 8. Save Model

In [None]:
# Save model checkpoint
checkpoint = {
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
    'scaler': scaler,
    'feature_names': feature_names,
    'config': {
        'input_size': input_size,
        'hidden_size': hidden_size,
        'num_layers': num_layers,
        'num_classes': num_classes,
        'dropout': dropout
    },
    'test_accuracy': test_accuracy,
    'history': history
}

torch.save(checkpoint, '../models/saved/lstm_orderbook_v1.pth')
print("✅ Model checkpoint saved to: models/saved/lstm_orderbook_v1.pth")

## 9. Summary and Next Steps

In [None]:
print("="*80)
print("MODEL DEVELOPMENT SUMMARY")
print("="*80)

print(f"\n📊 Dataset:")
print(f"  Total samples: {len(X):,}")
print(f"  Features: {n_features}")
print(f"  Sequence length: {X.shape[1]}")

print(f"\n🤖 Model:")
print(f"  Architecture: LSTM (2 layers, 128 hidden)")
print(f"  Parameters: {count_parameters(model):,}")
print(f"  Device: {device}")

print(f"\n📈 Performance:")
print(f"  Final train accuracy: {history['train_acc'][-1]:.4f}")
print(f"  Final val accuracy: {history['val_acc'][-1]:.4f}")
print(f"  Test accuracy: {test_accuracy:.4f}")

print(f"\n✅ Outputs:")
print(f"  1. Trained model checkpoint")
print(f"  2. Training curves (loss, accuracy)")
print(f"  3. Confusion matrix")
print(f"  4. Classification report")

print(f"\n🚀 Next Steps:")
print(f"  • Implement backtesting with trained model")
print(f"  • Calculate economic PnL and Sharpe ratio")
print(f"  • Deploy model to API for real-time inference")
print(f"  • Experiment with Transformer architecture")

print("="*80)