In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
import random
from sklearn.metrics import mean_squared_error, root_mean_squared_error, mean_absolute_error

torch.manual_seed(42)
np.random.seed(42)
random.seed(42)

In [2]:
df = pd.read_csv('monthly_FRED-MD_2024-12_processed.csv')

# Deep Learning

In [3]:
NUMBER_OF_LAGS = 20
TRAIN_RATIO = 0.8
HIDDEN_DIM = 16
NUM_LAYERS = 1
DROPOUT = 0.0
LR = 3e-4
BATCH_SIZE = 64
EPOCHS = 50
CLASS_THRESHOLD = 0.5

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f'Using device: {device}')

Using device: cpu


In [5]:
# Keep only numeric features (drop target)
numeric = df.select_dtypes(include=[np.number]).copy()
X_all = numeric.drop(columns=["sign", "volatility"]).values
y_vola = numeric["volatility"].astype(int).values

In [6]:
# Create sliding-window sequences
def create_sequences(X, y_vola, window):
    Xs, yv = [], []
    for i in range(window, len(X)):
        Xs.append(X[i - window : i, :])
        yv.append(y_vola[i])
    return np.array(Xs), np.array(yv)

X_seq, y_seq = create_sequences(X_all, y_vola, NUMBER_OF_LAGS)

In [7]:
train_size = int(len(X_seq) * TRAIN_RATIO)
X_train = X_seq[:train_size]; X_test = X_seq[train_size:]
yv_train = y_seq[:train_size]; yv_test = y_seq[train_size:]

In [8]:
# Torch datasets
X_train_t = torch.tensor(X_train, dtype=torch.float32).to(device)
yv_train_t = torch.tensor(yv_train, dtype=torch.float32).unsqueeze(1).to(device)
X_test_t = torch.tensor(X_test, dtype=torch.float32).to(device)

train_dataset = TensorDataset(X_train_t, yv_train_t)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=False)

## Define LSTM Model

In [9]:
class SimpleLSTM(nn.Module):
    def __init__(self, input_dim=1, hidden_dim=16, num_layers=1, dropout=0.0):
        super(SimpleLSTM, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_dim, 1)
    def forward(self, x):
        out, _ = self.lstm(x)
        out = out[:, -1, :]
        return self.fc(out)

input_dim = X_train.shape[2]
model = SimpleLSTM(input_dim=input_dim, hidden_dim=HIDDEN_DIM, num_layers=NUM_LAYERS, dropout=DROPOUT)
optimizer = optim.Adam(model.parameters(), lr=LR)
criterion = nn.MSELoss()

In [10]:
model

SimpleLSTM(
  (lstm): LSTM(113, 16, batch_first=True)
  (fc): Linear(in_features=16, out_features=1, bias=True)
)

## Model Training

X_train_t contains batch size, sequence length (number of lags) and number of variables.
Pytorch automatically calls forward()-method when input is given to the model.

In [11]:
model.train()
for epoch in range(EPOCHS+1):     # number of seeing the training data for learning parameters
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        y_pred = model(X_batch)
        loss = criterion(y_pred, y_batch)
        loss.backward()
        optimizer.step()
    if epoch % 10 == 0:
        print(f"Epoch {epoch}: loss = {loss.item():.4f}")

Epoch 0: loss = 2534.4175
Epoch 10: loss = 2488.8174
Epoch 20: loss = 2437.8862
Epoch 30: loss = 2380.7739
Epoch 40: loss = 2312.1919
Epoch 50: loss = 2244.0337


### Out of sample Prediction

In [12]:
model.eval()
with torch.no_grad():
    vol_preds = model(X_test_t).squeeze().numpy()

In [13]:
# regression metrics for volatility
mse_vol = mean_squared_error(yv_test, vol_preds)
rmse_vol = root_mean_squared_error(yv_test, vol_preds)
mae = mean_absolute_error(yv_test, vol_preds)
print(f"Volatility MSE (original units): {mse_vol:.6f}")
print(f"Volatility RMSE (original units): {rmse_vol:.6f}")
print(f"Volatility MAE (original units): {mae:.6f}")

Volatility MSE (original units): 1013.103882
Volatility RMSE (original units): 31.829292
Volatility MAE (original units): 30.366154


In [14]:
def count_parameters(m):
    total = sum(p.numel() for p in m.parameters())
    trainable = sum(p.numel() for p in m.parameters() if p.requires_grad)
    print(f"Total params: {total:,} | Trainable params: {trainable:,}")

count_parameters(model)

print(f"Data points: {df.shape[0]*df.shape[1]}")

Total params: 8,401 | Trainable params: 8,401
Data points: 90735
