In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
import seaborn as sns
from torch.utils.data import DataLoader, TensorDataset

# Define Attention Mechanism
class Attention(nn.Module):
    def __init__(self, hidden_size):
        super(Attention, self).__init__()
        self.attn = nn.Linear(hidden_size * 2, hidden_size)
        self.v = nn.Parameter(torch.rand(hidden_size))
        self.init_weights()

    def init_weights(self):
        nn.init.xavier_uniform_(self.attn.weight)
        nn.init.constant_(self.attn.bias, 0)
        nn.init.uniform_(self.v, -0.1, 0.1)

    def forward(self, hidden, encoder_outputs):
        seq_len = encoder_outputs.size(1)
        hidden = hidden.unsqueeze(1).repeat(1, seq_len, 1)
        energy = torch.tanh(self.attn(torch.cat((hidden, encoder_outputs), dim=2)))
        energy = energy.transpose(1, 2)
        v = self.v.repeat(encoder_outputs.size(0), 1).unsqueeze(1)
        attention_weights = torch.bmm(v, energy).squeeze(1)
        return torch.softmax(attention_weights, dim=1)

# Define LSTM Encoder
class LSTMEncoder(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, dropout):
        super(LSTMEncoder, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout)

    def forward(self, x):
        outputs, (hidden, cell) = self.lstm(x)
        return outputs, hidden, cell

# Define LSTM Decoder with Attention
class LSTMDecoderWithAttention(nn.Module):
    def __init__(self, hidden_size, output_size, num_layers, dropout):
        super(LSTMDecoderWithAttention, self).__init__()
        self.attention = Attention(hidden_size)
        self.lstm = nn.LSTM(hidden_size * 2, hidden_size, num_layers, batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, encoder_outputs, hidden, cell):
        attn_weights = self.attention(hidden[-1], encoder_outputs)
        context = torch.bmm(attn_weights.unsqueeze(1), encoder_outputs).squeeze(1)
        context = context.unsqueeze(1)  # Add sequence dimension
        outputs, (hidden, cell) = self.lstm(context, (hidden, cell))
        prediction = self.fc(outputs.squeeze(1))
        return prediction, hidden, cell

# Define Transformer Encoder
class TransformerEncoder(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_heads, dropout):
        super(TransformerEncoder, self).__init__()
        self.embedding = nn.Linear(input_size, hidden_size)
        self.positional_encoding = nn.Parameter(torch.zeros(1, 1000, hidden_size))  # Assuming max sequence length of 1000
        self.encoder_layers = nn.TransformerEncoderLayer(hidden_size, num_heads, hidden_size * 4, dropout)
        self.transformer_encoder = nn.TransformerEncoder(self.encoder_layers, num_layers)

    def forward(self, x):
        x = self.embedding(x) + self.positional_encoding[:, :x.size(1), :]
        x = self.transformer_encoder(x)
        return x

# Define Convolutional Layers
class ConvLayers(nn.Module):
    def __init__(self, input_size, hidden_size, kernel_size, dropout):
        super(ConvLayers, self).__init__()
        self.conv1 = nn.Conv1d(input_size, hidden_size, kernel_size, padding=(kernel_size - 1) // 2)
        self.conv2 = nn.Conv1d(hidden_size, hidden_size, kernel_size, padding=(kernel_size - 1) // 2)
        self.dropout = nn.Dropout(dropout)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = x.permute(0, 2, 1)  # Change shape to (batch_size, input_size, seq_len)
        x = self.conv1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.conv2(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = x.permute(0, 2, 1)  # Change shape back to (batch_size, seq_len, hidden_size)
        return x

# Define Hybrid Model with Models in Series
class HybridForecastingModel(nn.Module):
    def __init__(self, input_size, lstm_hidden_size, transformer_hidden_size, conv_hidden_size, context_size, output_size, lstm_num_layers, transformer_num_layers, transformer_num_heads, conv_kernel_size, dropout):
        super(HybridForecastingModel, self).__init__()
        self.lstm_encoder = LSTMEncoder(input_size, lstm_hidden_size, lstm_num_layers, dropout)
        self.transformer_encoder = TransformerEncoder(lstm_hidden_size, transformer_hidden_size, transformer_num_layers, transformer_num_heads, dropout)
        self.conv_layers = ConvLayers(transformer_hidden_size, conv_hidden_size, conv_kernel_size, dropout)
        self.lstm_decoder = LSTMDecoderWithAttention(conv_hidden_size, output_size, lstm_num_layers, dropout)
        self.fc = nn.Linear(output_size + context_size, output_size)

    def forward(self, past_data, current_context):
        encoder_outputs, hidden, cell = self.lstm_encoder(past_data)
        transformer_outputs = self.transformer_encoder(encoder_outputs)
        conv_outputs = self.conv_layers(transformer_outputs)
        lstm_output, _, _ = self.lstm_decoder(conv_outputs, hidden, cell)
        combined_features = torch.cat((lstm_output, current_context), dim=1)
        output = self.fc(combined_features)
        return output

# Define Autoencoder class
class Autoencoder(nn.Module):
    def __init__(self, input_dim, encoding_dim, hidden_dim):
        super(Autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, encoding_dim),
            nn.ReLU()
        )
        self.decoder = nn.Sequential(
            nn.Linear(encoding_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, input_dim),
            nn.Sigmoid()
        )
        self.init_weights()

    def init_weights(self):
        for layer in self.encoder:
            if isinstance(layer, nn.Linear):
                nn.init.xavier_uniform_(layer.weight)
                nn.init.constant_(layer.bias, 0)
        for layer in self.decoder:
            if isinstance(layer, nn.Linear):
                nn.init.xavier_uniform_(layer.weight)
                nn.init.constant_(layer.bias, 0)

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return encoded, decoded

def load_and_preprocess_data(file_path, features, target, context_features):
    df = pd.read_csv(file_path)

    # Check for missing values
    if df.isnull().sum().sum() > 0:
        df = df.fillna(method='ffill').fillna(method='bfill')

    # Normalize data
    scaler_features = MinMaxScaler()
    scaler_target = MinMaxScaler()
    scaler_context = MinMaxScaler()

    # Normalize feature columns
    df[features] = scaler_features.fit_transform(df[features])

    # Normalize target columns
    df[target] = scaler_target.fit_transform(df[target])

    # Normalize context columns
    df[context_features] = scaler_context.fit_transform(df[context_features])

    return df, scaler_target, scaler_context

def create_sequences(data, target_data, context_data, n_timesteps):
    X, y, context_features = [], [], []
    for i in range(len(data) - n_timesteps):
        X.append(data[i:i + n_timesteps].values)
        y.append(target_data.iloc[i + n_timesteps].values)
        context_features.append(context_data.iloc[i + n_timesteps].values)
    return np.array(X), np.array(y), np.array(context_features)

def prepare_data(df, features, target, context_features, n_timesteps):
    X, y, context = create_sequences(df[features], df[target], df[context_features], n_timesteps)

    # Convert to PyTorch tensors
    X = torch.tensor(X, dtype=torch.float32)
    y = torch.tensor(y, dtype=torch.float32)
    context = torch.tensor(context, dtype=torch.float32)

    return X, y, context

def train_autoencoder(autoencoder, train_loader, n_epochs, ae_optimizer, ae_criterion):
    for epoch in range(n_epochs):
        autoencoder.train()
        total_loss = 0
        for batch in train_loader:
            ae_optimizer.zero_grad()
            X_batch = batch[0]
            encoded, decoded = autoencoder(X_batch)
            loss = ae_criterion(decoded, X_batch)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(autoencoder.parameters(), max_norm=1.0)
            ae_optimizer.step()
            total_loss += loss.item()
        if (epoch + 1) % 10 == 0:
            print(f'Autoencoder Epoch [{epoch+1}/{n_epochs}], Loss: {total_loss/len(train_loader):.4f}')

def encode_data(autoencoder, data):
    autoencoder.eval()
    with torch.no_grad():
        encoded_data, _ = autoencoder.encoder(data)
    return encoded_data

def train_hybrid(model, train_loader, val_loader, n_epochs, optimizer, criterion, scheduler=None, patience=10):
    best_loss = float('inf')
    epochs_no_improve = 0
    early_stop = False

    for epoch in range(n_epochs):
        if early_stop:
            print("Early stopping")
            break

        model.train()
        total_loss = 0
        for batch in train_loader:
            optimizer.zero_grad()
            past_data, y_batch, context_data = batch
            output = model(past_data, context_data)
            loss = criterion(output, y_batch)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
            total_loss += loss.item()

        if scheduler:
            scheduler.step()

        # Validation
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for batch in val_loader:
                past_data, y_batch, context_data = batch
                output = model(past_data, context_data)
                loss = criterion(output, y_batch)
                val_loss += loss.item()

        val_loss /= len(val_loader)
        print(f'Epoch [{epoch+1}/{n_epochs}], Loss: {total_loss/len(train_loader):.4f}, Val Loss: {val_loss:.4f}')

        # Early stopping
        if val_loss < best_loss:
            best_loss = val_loss
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1
            if epochs_no_improve >= patience:
                early_stop = True

def evaluate_hybrid(model, test_loader, scaler_target):
    model.eval()
    forecast_list = []
    with torch.no_grad():
        for batch in test_loader:
            past_data, _, context_data = batch
            forecast = model(past_data, context_data)
            forecast_list.append(forecast.cpu().numpy())

    # Convert lists to numpy arrays
    forecast_array = np.concatenate(forecast_list, axis=0)

    # Inverse-transform the predictions to the original scale
    forecast_original_scale = scaler_target.inverse_transform(forecast_array)
    return forecast_original_scale

def main(file_path, features, target, context_features, n_timesteps, encoding_dim, hidden_dim, lstm_hidden_size, transformer_hidden_size, conv_hidden_size, context_size, output_size, lstm_num_layers, transformer_num_layers, transformer_num_heads, conv_kernel_size, dropout, learning_rate, n_epochs, ae_n_epochs, batch_size, n_splits=5):
    # Load and preprocess data
    df, scaler_target, scaler_context = load_and_preprocess_data(file_path, features, target, context_features)

    # Prepare data
    X, y, context = prepare_data(df, features, target, context_features, n_timesteps)

    # Define K-Fold cross-validation
    kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)

    for fold, (train_index, val_index) in enumerate(kf.split(X)):
        print(f"Fold {fold+1}/{n_splits}")

        X_train, X_val = X[train_index], X[val_index]
        y_train, y_val = y[train_index], y[val_index]
        context_train, context_val = context[train_index], context[val_index]

        # Create DataLoader for autoencoder training
        train_dataset = TensorDataset(X_train)
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

        # Define the input dimension, encoding dimension, and hidden dimension
        input_dim = X_train.size(2)

        # Initialize the autoencoder
        autoencoder = Autoencoder(input_dim, encoding_dim, hidden_dim)

        # Define the optimizer and loss function for autoencoder
        ae_optimizer = optim.Adam(autoencoder.parameters(), lr=0.001)
        ae_criterion = nn.MSELoss()

        # Train the autoencoder
        train_autoencoder(autoencoder, train_loader, ae_n_epochs, ae_optimizer, ae_criterion)

        # Encode the training and validation data
        encoded_train = encode_data(autoencoder, X_train.view(-1, X_train.size(2)))
        encoded_val = encode_data(autoencoder, X_val.view(-1, X_val.size(2)))

        # Reshape the encoded features back to the original sequence shape
        encoded_train = encoded_train.view(X_train.size(0), X_train.size(1), -1)
        encoded_val = encoded_val.view(X_val.size(0), X_val.size(1), -1)

        # Create DataLoader for hybrid model training
        train_dataset = TensorDataset(encoded_train, y_train, context_train)
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
        val_dataset = TensorDataset(encoded_val, y_val, context_val)
        val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

        # Initialize model
        input_size = encoded_train.size(2)  # Encoded feature size
        model = HybridForecastingModel(input_size, lstm_hidden_size, transformer_hidden_size, conv_hidden_size, context_size, output_size, lstm_num_layers, transformer_num_layers, transformer_num_heads, conv_kernel_size, dropout)
        optimizer = optim.Adam(model.parameters(), lr=learning_rate)
        criterion = nn.MSELoss()
        scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

        # Train the hybrid model with early stopping
        train_hybrid(model, train_loader, val_loader, n_epochs, optimizer, criterion, scheduler)

    # Evaluate the hybrid model on the test set
    test_dataset = TensorDataset(X, y, context)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    forecast_original_scale = evaluate_hybrid(model, test_loader, scaler_target)

    # Compare predictions to the actual values
    y_original_scale = scaler_target.inverse_transform(y.cpu().numpy())
    print("Predictions on original scale:", forecast_original_scale)
    print("True values on original scale:", y_original_scale)

# Define parameters
file_path = 'time_series_data.csv'
features = ['rate_level_1', 'rate_level_2', 'days_to_end_of_month', 'days_to_ECB_meeting', 'days_to_Fed_meeting', 'ois_sofr_rate']
target = ['rate_level_1', 'rate_level_2']
context_features = ['stock_price', 'fx_rate', 'commodity_price']
n_timesteps = 12
encoding_dim = 20
hidden_dim = 128
lstm_hidden_size = 128
transformer_hidden_size = 128
conv_hidden_size = 128
context_size = len(context_features)  # Size of the context data
output_size = 2  # Number of targets
lstm_num_layers = 2
transformer_num_layers = 2
transformer_num_heads = 4
conv_kernel_size = 3
dropout = 0.3
learning_rate = 0.001
n_epochs = 50
ae_n_epochs = 50
batch_size = 64

# Run the main function
main(file_path, features, target, context_features, n_timesteps, encoding_dim, hidden_dim, lstm_hidden_size, transformer_hidden_size, conv_hidden_size, context_size, output_size, lstm_num_layers, transformer_num_layers, transformer_num_heads, conv_kernel_size, dropout, learning_rate, n_epochs, ae_n_epochs, batch_size)