In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

# Load and preprocess data
df = pd.read_csv('time_series_data.csv')

# Define your feature and target columns
features = ['rate_level_1', 'rate_level_2',
            'days_to_end_of_month', 'days_to_ECB_meeting', 'days_to_Fed_meeting', 'ois_sofr_rate']

target = ['rate_level_1', 'rate_level_2']

# Normalize data
scaler_features = MinMaxScaler()
scaler_target = MinMaxScaler()

df[features] = scaler_features.fit_transform(df[features])
df[target] = scaler_target.fit_transform(df[target])

# Function to create sequences
def create_sequences(data, target_data, n_timesteps):
    X, y = [], []
    for i in range(len(data) - n_timesteps):
        X.append(data[i:i + n_timesteps].values)
        y.append(target_data.iloc[i + n_timesteps].values)
    return np.array(X), np.array(y)

# Prepare sequences
n_timesteps_input = 12
X, y = create_sequences(df[features], df[target], n_timesteps_input)

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert to PyTorch tensors
import torch

X_train = torch.tensor(X_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)

In [1]:
import torch.nn as nn
import torch.optim as optim

class TimeSeriesTransformer(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_heads, dropout, output_size):
        super(TimeSeriesTransformer, self).__init__()
        self.encoder_layer = nn.TransformerEncoderLayer(d_model=input_size, nhead=num_heads, dim_feedforward=hidden_size, dropout=dropout)
        self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=num_layers)
        self.fc = nn.Linear(input_size, output_size)

    def forward(self, src):
        src = src.permute(1, 0, 2)  # (batch_size, seq_len, input_size) -> (seq_len, batch_size, input_size)
        transformer_output = self.transformer_encoder(src)
        transformer_output = transformer_output.permute(1, 0, 2)  # (seq_len, batch_size, input_size) -> (batch_size, seq_len, input_size)
        output = self.fc(transformer_output[:, -1, :])  # Use the output of the last time step
        return output

# Hyperparameters
input_size = X_train.size(2)
hidden_size = 128
num_layers = 2
num_heads = 4
dropout = 0.3
output_size = len(target)
learning_rate = 0.001
n_epochs = 50

# Initialize model
model = TimeSeriesTransformer(input_size, hidden_size, num_layers, num_heads, dropout, output_size)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.MSELoss()

ModuleNotFoundError: No module named 'torch'

In [2]:
# Training loop
for epoch in range(n_epochs):
    model.train()
    optimizer.zero_grad()
    
    # Forward pass
    output = model(X_train)
    
    # Calculate loss
    loss = criterion(output, y_train)
    
    # Backward pass and optimize
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{n_epochs}], Loss: {loss.item():.4f}')

NameError: name 'n_epochs' is not defined

In [None]:
# Example evaluation on test set
model.eval()
with torch.no_grad():
    predictions = model(X_test)

# Inverse-transform the predictions to the original scale
predictions_original_scale = scaler_target.inverse_transform(predictions.cpu().numpy())
y_test_original_scale = scaler_target.inverse_transform(y_test.cpu().numpy())

# Compare predictions to the actual values
print("Predictions on original scale:", predictions_original_scale)
print("True values on original scale:", y_test_original_scale)

In [None]:
from torch.utils.data import Dataset, DataLoader

class TimeSeriesDataset(Dataset):
    def __init__(self, X, y, current_temporal):
        self.X = X
        self.y = y
        self.current_temporal = current_temporal

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx], self.current_temporal[idx]

# Create datasets
train_dataset = TimeSeriesDataset(X_train_encoded, y_train, current_temporal_train)
test_dataset = TimeSeriesDataset(X_test_encoded, y_test, current_temporal_test)

# Create data loaders
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [None]:
# Training loop with batches
for epoch in range(n_epochs):
    model.train()
    epoch_loss = 0
    for X_batch, y_batch, current_temporal_batch in train_loader:
        optimizer.zero_grad()
        
        # Forward pass
        output = model(X_batch, current_temporal_batch.unsqueeze(1))
        
        # Calculate loss
        loss = criterion(output, y_batch)
        
        # Backward pass and optimize
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()

    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{n_epochs}], Loss: {epoch_loss/len(train_loader):.4f}')

In [None]:
# Example evaluation on test set with batches
model.eval()
test_loss = 0
predictions = []
with torch.no_grad():
    for X_batch, y_batch, current_temporal_batch in test_loader:
        forecast_encoded = model(X_batch, current_temporal_batch.unsqueeze(1))
        predictions.append(forecast_encoded)
        loss = criterion(forecast_encoded, y_batch)
        test_loss += loss.item()

# Concatenate all predictions
predictions = torch.cat(predictions, dim=0)

# Decode the forecasted encoded features back to the original feature space
with torch.no_grad():
    forecast_decoded = autoencoder.decoder(predictions)

# Inverse-transform the predictions to the original scale
forecast_original_scale = scaler_target.inverse_transform(forecast_decoded.cpu().numpy())
y_test_original_scale = scaler_target.inverse_transform(y_test.cpu().numpy())

# Compare predictions to the actual values
print("Predictions on original scale:", forecast_original_scale)
print("True values on original scale:", y_test_original_scale)
print(f'Test Loss: {test_loss/len(test_loader):.4f}')

In [None]:
import torch.nn as nn
import torch.optim as optim

class TimeSeriesTransformer(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_heads, dropout, output_size):
        super(TimeSeriesTransformer, self).__init__()
        self.encoder_layer = nn.TransformerEncoderLayer(d_model=input_size, nhead=num_heads, dim_feedforward=hidden_size, dropout=dropout)
        self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=num_layers)
        self.fc = nn.Linear(input_size, output_size)

    def forward(self, src):
        src = src.permute(1, 0, 2)  # (batch_size, seq_len, input_size) -> (seq_len, batch_size, input_size)
        transformer_output = self.transformer_encoder(src)
        transformer_output = transformer_output.permute(1, 0, 2)  # (seq_len, batch_size, input_size) -> (batch_size, seq_len, input_size)
        output = self.fc(transformer_output[:, -1, :])  # Use the output of the last time step
        return output

# Hyperparameters
input_size = X_train.size(2)
hidden_size = 128
num_layers = 2
num_heads = 4
dropout = 0.3
output_size = len(target)
learning_rate = 0.001
n_epochs = 50

# Initialize model
model = TimeSeriesTransformer(input_size, hidden_size, num_layers, num_heads, dropout, output_size)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.MSELoss()

In [None]:
# Training loop with batches
for epoch in range(n_epochs):
    model.train()
    epoch_loss = 0
    for X_batch, y_batch, current_temporal_batch in train_loader:
        optimizer.zero_grad()
        
        # Forward pass
        output = model(X_batch)
        
        # Calculate loss
        loss = criterion(output, y_batch)
        
        # Backward pass and optimize
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()

    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{n_epochs}], Loss: {epoch_loss/len(train_loader):.4f}')

In [None]:
# Example evaluation on test set with batches
model.eval()
test_loss = 0
predictions = []
with torch.no_grad():
    for X_batch, y_batch, current_temporal_batch in test_loader:
        forecast_encoded = model(X_batch)
        predictions.append(forecast_encoded)
        loss = criterion(forecast_encoded, y_batch)
        test_loss += loss.item()

# Concatenate all predictions
predictions = torch.cat(predictions, dim=0)

# Decode the forecasted encoded features back to the original feature space
with torch.no_grad():
    forecast_decoded = autoencoder.decoder(predictions)

# Inverse-transform the predictions to the original scale
forecast_original_scale = scaler_target.inverse_transform(forecast_decoded.cpu().numpy())
y_test_original_scale = scaler_target.inverse_transform(y_test.cpu().numpy())

# Compare predictions to the actual values
print("Predictions on original scale:", forecast_original_scale)
print("True values on original scale:", y_test_original_scale)
print(f'Test Loss: {test_loss/len(test_loader):.4f}')

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

# Define the Transformer model
class TimeSeriesTransformer(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_heads, dropout, output_size):
        super(TimeSeriesTransformer, self).__init__()
        self.encoder_layer = nn.TransformerEncoderLayer(d_model=input_size, nhead=num_heads, dim_feedforward=hidden_size, dropout=dropout)
        self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=num_layers)
        self.fc = nn.Linear(input_size, output_size)

    def forward(self, src):
        src = src.permute(1, 0, 2)  # (batch_size, seq_len, input_size) -> (seq_len, batch_size, input_size)
        transformer_output = self.transformer_encoder(src)
        transformer_output = transformer_output.permute(1, 0, 2)  # (seq_len, batch_size, input_size) -> (batch_size, seq_len, input_size)
        output = self.fc(transformer_output[:, -1, :])  # Use the output of the last time step
        return output

# Hyperparameters
input_size = X_train.size(2)
hidden_size = 128
num_layers = 2
num_heads = 4
dropout = 0.3
output_size = len(target)
learning_rate = 0.001
n_epochs = 50
batch_size = 32

# Initialize model
model = TimeSeriesTransformer(input_size, hidden_size, num_layers, num_heads, dropout, output_size)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.MSELoss()

# Create DataLoader
class TimeSeriesDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

train_dataset = TimeSeriesDataset(X_train, y_train)
test_dataset = TimeSeriesDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Training loop
for epoch in range(n_epochs):
    model.train()
    epoch_loss = 0
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        
        # Forward pass
        output = model(X_batch)
        
        # Calculate loss
        loss = criterion(output, y_batch)
        
        # Backward pass and optimize
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()

    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{n_epochs}], Loss: {epoch_loss/len(train_loader):.4f}')

# Evaluation on test set
model.eval()
test_loss = 0
predictions = []
with torch.no_grad():
    for X_batch, y_batch in test_loader:
        forecast_encoded = model(X_batch)
        predictions.append(forecast_encoded)
        loss = criterion(forecast_encoded, y_batch)
        test_loss += loss.item()

# Concatenate all predictions
predictions = torch.cat(predictions, dim=0)

# Decode the forecasted encoded features back to the original feature space
with torch.no_grad():
    forecast_decoded = autoencoder.decoder(predictions)

# Inverse-transform the predictions to the original scale
forecast_original_scale = scaler_target.inverse_transform(forecast_decoded.cpu().numpy())
y_test_original_scale = scaler_target.inverse_transform(y_test.cpu().numpy())

# Compare predictions to the actual values
print("Predictions on original scale:", forecast_original_scale)
print("True values on original scale:", y_test_original_scale)
print(f'Test Loss: {test_loss/len(test_loader):.4f}')

In [None]:
class TimeSeriesTransformer(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_heads, dropout, output_size):
        super(TimeSeriesTransformer, self).__init__()
        self.encoder_layer = nn.TransformerEncoderLayer(d_model=input_size, nhead=num_heads, dim_feedforward=hidden_size, dropout=dropout)
        self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=num_layers)
        self.fc = nn.Linear(input_size, output_size)

    def forward(self, src):
        src = src.permute(1, 0, 2)  # (batch_size, seq_len, input_size) -> (seq_len, batch_size, input_size)
        transformer_output = self.transformer_encoder(src)
        transformer_output = transformer_output.permute(1, 0, 2)  # (seq_len, batch_size, input_size) -> (batch_size, seq_len, input_size)
        output = self.fc(transformer_output[:, -1, :])  # Use the output of the last time step
        return output, self.encoder_layer.self_attn.attn_weights  # Return attention weights

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Example evaluation on test set with batches
model.eval()
test_loss = 0
predictions = []
attention_weights = []
with torch.no_grad():
    for X_batch, y_batch in test_loader:
        forecast_encoded, attn_weights = model(X_batch)
        predictions.append(forecast_encoded)
        attention_weights.append(attn_weights)
        loss = criterion(forecast_encoded, y_batch)
        test_loss += loss.item()

# Concatenate all predictions and attention weights
predictions = torch.cat(predictions, dim=0)
attention_weights = torch.cat(attention_weights, dim=0)

# Decode the forecasted encoded features back to the original feature space
with torch.no_grad():
    forecast_decoded = autoencoder.decoder(predictions)

# Inverse-transform the predictions to the original scale
forecast_original_scale = scaler_target.inverse_transform(forecast_decoded.cpu().numpy())
y_test_original_scale = scaler_target.inverse_transform(y_test.cpu().numpy())

# Compare predictions to the actual values
print("Predictions on original scale:", forecast_original_scale)
print("True values on original scale:", y_test_original_scale)
print(f'Test Loss: {test_loss/len(test_loader):.4f}')

# Visualize attention weights for a single batch
def plot_attention_weights(attn_weights, input_sequence):
    sns.heatmap(attn_weights.cpu().numpy(), cmap='viridis')
    plt.xlabel('Input Sequence')
    plt.ylabel('Attention Weights')
    plt.title('Attention Weights Heatmap')
    plt.show()

# Plot attention weights for the first batch
plot_attention_weights(attention_weights[0], X_test[0])