In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
import seaborn as sns
from torch.utils.data import DataLoader, TensorDataset

# Define Attention Mechanism
class Attention(nn.Module):
    def __init__(self, hidden_size):
        super(Attention, self).__init__()
        self.attn = nn.Linear(hidden_size * 2, hidden_size)
        self.v = nn.Parameter(torch.rand(hidden_size))
        self.init_weights()

    def init_weights(self):
        nn.init.xavier_uniform_(self.attn.weight)
        nn.init.constant_(self.attn.bias, 0)
        nn.init.uniform_(self.v, -0.1, 0.1)

    def forward(self, hidden, encoder_outputs):
        seq_len = encoder_outputs.size(1)
        hidden = hidden.unsqueeze(1).repeat(1, seq_len, 1)
        energy = torch.tanh(self.attn(torch.cat((hidden, encoder_outputs), dim=2)))
        energy = energy.transpose(1, 2)
        v = self.v.repeat(encoder_outputs.size(0), 1).unsqueeze(1)
        attention_weights = torch.bmm(v, energy).squeeze(1)
        return torch.softmax(attention_weights, dim=1)

# Define Encoder
class LSTMEncoder(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, dropout):
        super(LSTMEncoder, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout)
        self.init_weights()

    def init_weights(self):
        for name, param in self.lstm.named_parameters():
            if 'weight' in name:
                nn.init.xavier_uniform_(param)
            elif 'bias' in name:
                nn.init.constant_(param, 0)

    def forward(self, x):
        outputs, (hidden, cell) = self.lstm(x)
        return outputs, hidden, cell

# Define Decoder with Attention
class LSTMDecoderWithAttention(nn.Module):
    def __init__(self, hidden_size, output_size, num_layers, dropout):
        super(LSTMDecoderWithAttention, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.attention = Attention(hidden_size)
        self.lstm = nn.LSTM(hidden_size + 4, hidden_size, num_layers, batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_size, output_size)
        self.init_weights()

    def init_weights(self):
        for name, param in self.lstm.named_parameters():
            if 'weight' in name:
                nn.init.xavier_uniform_(param)
            elif 'bias' in name:
                nn.init.constant_(param, 0)
        nn.init.xavier_uniform_(self.fc.weight)
        nn.init.constant_(self.fc.bias, 0)

    def forward(self, hidden, cell, encoder_outputs, current_temporal_features):
        attention_weights = self.attention(hidden[-1], encoder_outputs)
        attention_weights = attention_weights.unsqueeze(1)
        context = torch.bmm(attention_weights, encoder_outputs)
        decoder_input = torch.cat([context, current_temporal_features], dim=2)
        output, (hidden, cell) = self.lstm(decoder_input, (hidden, cell))
        prediction = self.fc(output).squeeze(1)
        return prediction, hidden, cell, attention_weights.squeeze(1)

# Define full Encoder-Decoder Model
class LSTMEncoderDecoderWithAttention(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers, dropout):
        super(LSTMEncoderDecoderWithAttention, self).__init__()
        self.encoder = LSTMEncoder(input_size, hidden_size, num_layers, dropout)
        self.decoder = LSTMDecoderWithAttention(hidden_size, output_size, num_layers, dropout)

    def forward(self, encoder_input, current_temporal_features):
        encoder_outputs, hidden, cell = self.encoder(encoder_input)
        prediction, _, _, attention_weights = self.decoder(hidden, cell, encoder_outputs, current_temporal_features)
        return prediction, attention_weights

# Define Autoencoder class
class Autoencoder(nn.Module):
    def __init__(self, input_dim, encoding_dim, hidden_dim):
        super(Autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, encoding_dim),
            nn.ReLU()
        )
        self.decoder = nn.Sequential(
            nn.Linear(encoding_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, input_dim),
            nn.Sigmoid()
        )
        self.init_weights()

    def init_weights(self):
        for layer in self.encoder:
            if isinstance(layer, nn.Linear):
                nn.init.xavier_uniform_(layer.weight)
                nn.init.constant_(layer.bias, 0)
        for layer in self.decoder:
            if isinstance(layer, nn.Linear):
                nn.init.xavier_uniform_(layer.weight)
                nn.init.constant_(layer.bias, 0)

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return encoded, decoded

def load_and_preprocess_data(file_path, features, target, current_temporal_features):
    df = pd.read_csv(file_path)

    # Check for missing values
    if df.isnull().sum().sum() > 0:
        df = df.fillna(method='ffill').fillna(method='bfill')

    # Normalize data
    scaler_features = MinMaxScaler()
    scaler_target = MinMaxScaler()

    # Normalize feature columns
    df[features] = scaler_features.fit_transform(df[features])

    # Normalize target columns
    df[target] = scaler_target.fit_transform(df[target])

    return df, scaler_target

def create_sequences(data, target_data, current_temporal_features, n_timesteps):
    X, y, current_time_features = [], [], []
    for i in range(len(data) - n_timesteps):
        X.append(data[i:i + n_timesteps].values)
        y.append(target_data.iloc[i + n_timesteps].values)
        current_time_features.append(data[current_temporal_features].iloc[i + n_timesteps].values)
    return np.array(X), np.array(y), np.array(current_time_features)

def prepare_data(df, features, target, current_temporal_features, n_timesteps, test_size=0.2):
    X, y, current_temporal = create_sequences(df[features], df[target], current_temporal_features, n_timesteps)
    X_train, X_test, y_train, y_test, current_temporal_train, current_temporal_test = train_test_split(
        X, y, current_temporal, test_size=test_size, random_state=42)

    # Convert to PyTorch tensors
    X_train = torch.tensor(X_train, dtype=torch.float32)
    X_test = torch.tensor(X_test, dtype=torch.float32)
    y_train = torch.tensor(y_train, dtype=torch.float32)
    y_test = torch.tensor(y_test, dtype=torch.float32)
    current_temporal_train = torch.tensor(current_temporal_train, dtype=torch.float32).unsqueeze(1)
    current_temporal_test = torch.tensor(current_temporal_test, dtype=torch.float32).unsqueeze(1)

    return X_train, X_test, y_train, y_test, current_temporal_train, current_temporal_test

def train_autoencoder(autoencoder, train_loader, n_epochs, ae_optimizer, ae_criterion):
    for epoch in range(n_epochs):
        autoencoder.train()
        total_loss = 0
        for batch in train_loader:
            ae_optimizer.zero_grad()
            combined_batch = batch[0]
            encoded, decoded = autoencoder(combined_batch)
            loss = ae_criterion(decoded, combined_batch)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(autoencoder.parameters(), max_norm=1.0)
            ae_optimizer.step()
            total_loss += loss.item()
        if (epoch + 1) % 10 == 0:
            print(f'Autoencoder Epoch [{epoch+1}/{n_epochs}], Loss: {total_loss/len(train_loader):.4f}')

def encode_data(autoencoder, data):
    autoencoder.eval()
    with torch.no_grad():
        encoded_data, _ = autoencoder.encoder(data)
    return encoded_data

def train_lstm(model, train_loader, n_epochs, optimizer, criterion, scheduler=None):
    for epoch in range(n_epochs):
        model.train()
        total_loss = 0
        for batch in train_loader:
            optimizer.zero_grad()
            X_batch, y_batch, current_temporal_batch = batch
            output, _ = model(X_batch, current_temporal_batch)
            loss = criterion(output, y_batch)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
            total_loss += loss.item()
        if scheduler:
            scheduler.step()
        if (epoch + 1) % 10 == 0:
            print(f'Epoch [{epoch+1}/{n_epochs}], Loss: {total_loss/len(train_loader):.4f}')

def evaluate_lstm(model, test_loader, scaler_target):
    model.eval()
    attention_weights_list = []
    with torch.no_grad():
        for batch in test_loader:
            X_batch, _, current_temporal_batch = batch
            forecast, attention_weights = model(X_batch, current_temporal_batch)
            attention_weights_list.append(attention_weights.cpu().numpy())

    # Convert list to numpy array
    attention_weights_array = np.concatenate(attention_weights_list, axis=0)

    # Inverse-transform the predictions to the original scale
    forecast_original_scale = scaler_target.inverse_transform(forecast.cpu().numpy())
    return forecast_original_scale, attention_weights_array

def visualize_attention_weights(attention_weights_array, sample_index=0):
    attention_weights_sample = attention_weights_array[sample_index]
    plt.figure(figsize=(10, 6))
    sns.heatmap(attention_weights_sample.reshape(1, -1), cmap='viridis', annot=True)
    plt.title('Attention Weights for Sample Index {}'.format(sample_index))
    plt.xlabel('Input Sequence Index')
    plt.ylabel('Attention Weight')
    plt.show()

def main(file_path, features, target, current_temporal_features, n_timesteps, encoding_dim, hidden_dim, lstm_hidden_size, lstm_num_layers, lstm_dropout, lstm_learning_rate, lstm_n_epochs, ae_n_epochs, batch_size):
    # Load and preprocess data
    df, scaler_target = load_and_preprocess_data(file_path, features, target, current_temporal_features)

    # Prepare data
    X_train, X_test, y_train, y_test, current_temporal_train, current_temporal_test = prepare_data(df, features, target, current_temporal_features, n_timesteps)

    # Combine macro features and target features for autoencoder training
    combined_train = torch.cat((X_train.view(-1, X_train.size(2)), y_train.view(-1, y_train.size(1))), dim=1)
    combined_test = torch.cat((X_test.view(-1, X_test.size(2)), y_test.view(-1, y_test.size(1))), dim=1)

    # Create DataLoader for autoencoder training
    train_dataset = TensorDataset(combined_train)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

    # Define the input dimension, encoding dimension, and hidden dimension
    input_dim = combined_train.size(1)

    # Initialize the autoencoder
    autoencoder = Autoencoder(input_dim, encoding_dim, hidden_dim)

    # Define the optimizer and loss function for autoencoder
    ae_optimizer = optim.Adam(autoencoder.parameters(), lr=0.001)
    ae_criterion = nn.MSELoss()

    # Train the autoencoder
    train_autoencoder(autoencoder, train_loader, ae_n_epochs, ae_optimizer, ae_criterion)

    # Encode the training and test data
    combined_train_encoded = encode_data(autoencoder, combined_train)
    combined_test_encoded = encode_data(autoencoder, combined_test)

    # Reshape the encoded features back to the original sequence shape
    encoded_train = combined_train_encoded.view(X_train.size(0), X_train.size(1), -1)
    encoded_test = combined_test_encoded.view(X_test.size(0), X_test.size(1), -1)

    # Create DataLoader for LSTM training
    train_dataset = TensorDataset(encoded_train, y_train, current_temporal_train)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_dataset = TensorDataset(encoded_test, y_test, current_temporal_test)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    # Initialize model
    input_size = encoded_train.size(2)  # Encoded feature size
    output_size = y_train.size(1)  # Number of targets
    model = LSTMEncoderDecoderWithAttention(input_size, lstm_hidden_size, output_size, lstm_num_layers, lstm_dropout)
    optimizer = optim.Adam(model.parameters(), lr=lstm_learning_rate)
    criterion = nn.MSELoss()
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

    # Train the LSTM model
    train_lstm(model, train_loader, lstm_n_epochs, optimizer, criterion, scheduler)

    # Evaluate the LSTM model
    forecast_original_scale, attention_weights_array = evaluate_lstm(model, test_loader, scaler_target)

    # Compare predictions to the actual values
    y_test_original_scale = scaler_target.inverse_transform(y_test.cpu().numpy())
    print("Predictions on original scale:", forecast_original_scale)
    print("True values on original scale:", y_test_original_scale)

    # Visualize attention weights for a specific sample
    visualize_attention_weights(attention_weights_array)

# Define parameters
file_path = 'time_series_data.csv'
features = ['rate_level_1', 'rate_level_2', 'days_to_end_of_month', 'days_to_ECB_meeting', 'days_to_Fed_meeting', 'ois_sofr_rate']
target = ['rate_level_1', 'rate_level_2']
current_temporal_features = ['days_to_end_of_month', 'days_to_ECB_meeting', 'days_to_Fed_meeting', 'ois_sofr_rate']
n_timesteps = 12
encoding_dim = 20
hidden_dim = 128
lstm_hidden_size = 128
lstm_num_layers = 2
lstm_dropout = 0.3
lstm_learning_rate = 0.001
lstm_n_epochs = 50
ae_n_epochs = 50
batch_size = 64

# Run the main function
main(file_path, features, target, current_temporal_features, n_timesteps, encoding_dim, hidden_dim, lstm_hidden_size, lstm_num_layers, lstm_dropout, lstm_learning_rate, lstm_n_epochs, ae_n_epochs, batch_size)

In [None]:
from sklearn.base import BaseEstimator, RegressorMixin

class PyTorchLSTMWrapper(BaseEstimator, RegressorMixin):
    def __init__(self, input_size, hidden_size, output_size, num_layers, dropout, learning_rate, n_epochs, batch_size):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.num_layers = num_layers
        self.dropout = dropout
        self.learning_rate = learning_rate
        self.n_epochs = n_epochs
        self.batch_size = batch_size
        self.model = LSTMEncoderDecoderWithAttention(input_size, hidden_size, output_size, num_layers, dropout)
        self.optimizer = optim.Adam(self.model.parameters(), lr=learning_rate)
        self.criterion = nn.MSELoss()
        self.scheduler = optim.lr_scheduler.StepLR(self.optimizer, step_size=10, gamma=0.1)

    def fit(self, X, y, current_temporal):
        dataset = TensorDataset(X, y, current_temporal)
        loader = DataLoader(dataset, batch_size=self.batch_size, shuffle=True)
        train_lstm(self.model, loader, self.n_epochs, self.optimizer, self.criterion, self.scheduler)
        return self

    def predict(self, X, current_temporal):
        dataset = TensorDataset(X, torch.zeros(X.size(0), self.output_size), current_temporal)
        loader = DataLoader(dataset, batch_size=self.batch_size, shuffle=False)
        self.model.eval()
        predictions = []
        with torch.no_grad():
            for batch in loader:
                X_batch, _, current_temporal_batch = batch
                forecast, _ = self.model(X_batch, current_temporal_batch)
                predictions.append(forecast.cpu().numpy())
        return np.concatenate(predictions, axis=0)

In [None]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'hidden_size': [64, 128],
    'num_layers': [1, 2],
    'dropout': [0.2, 0.3],
    'learning_rate': [0.001, 0.01],
    'n_epochs': [30, 50],
    'batch_size': [32, 64]
}

In [None]:
def main_with_grid_search(file_path, features, target, current_temporal_features, n_timesteps, encoding_dim, hidden_dim):
    # Load and preprocess data
    df, scaler_target = load_and_preprocess_data(file_path, features, target, current_temporal_features)

    # Prepare data
    X_train, X_test, y_train, y_test, current_temporal_train, current_temporal_test = prepare_data(df, features, target, current_temporal_features, n_timesteps)

    # Combine macro features and target features for autoencoder training
    combined_train = torch.cat((X_train.view(-1, X_train.size(2)), y_train.view(-1, y_train.size(1))), dim=1)
    combined_test = torch.cat((X_test.view(-1, X_test.size(2)), y_test.view(-1, y_test.size(1))), dim=1)

    # Create DataLoader for autoencoder training
    train_dataset = TensorDataset(combined_train)
    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

    # Define the input dimension, encoding dimension, and hidden dimension
    input_dim = combined_train.size(1)

    # Initialize the autoencoder
    autoencoder = Autoencoder(input_dim, encoding_dim, hidden_dim)

    # Define the optimizer and loss function for autoencoder
    ae_optimizer = optim.Adam(autoencoder.parameters(), lr=0.001)
    ae_criterion = nn.MSELoss()

    # Train the autoencoder
    train_autoencoder(autoencoder, train_loader, 50, ae_optimizer, ae_criterion)

    # Encode the training and test data
    combined_train_encoded = encode_data(autoencoder, combined_train)
    combined_test_encoded = encode_data(autoencoder, combined_test)

    # Reshape the encoded features back to the original sequence shape
    encoded_train = combined_train_encoded.view(X_train.size(0), X_train.size(1), -1)
    encoded_test = combined_test_encoded.view(X_test.size(0), X_test.size(1), -1)

    # Initialize the PyTorch model wrapper
    input_size = encoded_train.size(2)  # Encoded feature size
    output_size = y_train.size(1)  # Number of targets
    model = PyTorchLSTMWrapper(input_size, hidden_dim, output_size, 2, 0.3, 0.001, 50, 64)

    # Perform GridSearchCV
    grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=3, scoring='neg_mean_squared_error', verbose=2)
    grid_search.fit(encoded_train, y_train, current_temporal_train)

    # Get the best model
    best_model = grid_search.best_estimator_

    # Evaluate the best model
    forecast_original_scale, attention_weights_array = evaluate_lstm(best_model.model, DataLoader(TensorDataset(encoded_test, y_test, current_temporal_test), batch_size=64, shuffle=False), scaler_target)

    # Compare predictions to the actual values
    y_test_original_scale = scaler_target.inverse_transform(y_test.cpu().numpy())
    print("Predictions on original scale:", forecast_original_scale)
    print("True values on original scale:", y_test_original_scale)

    # Visualize attention weights for a specific sample
    visualize_attention_weights(attention_weights_array)

# Define parameters
file_path = 'time_series_data.csv'
features = ['rate_level_1', 'rate_level_2', 'days_to_end_of_month', 'days_to_ECB_meeting', 'days_to_Fed_meeting', 'ois_sofr_rate']
target = ['rate_level_1', 'rate_level_2']
current_temporal_features = ['days_to_end_of_month', 'days_to_ECB_meeting', 'days_to_Fed_meeting', 'ois_sofr_rate']
n_timesteps = 12
encoding_dim = 20
hidden_dim = 128

# Run the main function with GridSearchCV
main_with_grid_search(file_path, features, target, current_temporal_features, n_timesteps, encoding_dim, hidden_dim)

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
import seaborn as sns
from torch.utils.data import DataLoader, TensorDataset

# Define Attention Mechanism
class Attention(nn.Module):
    def __init__(self, hidden_size):
        super(Attention, self).__init__()
        self.attn = nn.Linear(hidden_size * 2, hidden_size)
        self.v = nn.Parameter(torch.rand(hidden_size))
        self.init_weights()

    def init_weights(self):
        nn.init.xavier_uniform_(self.attn.weight)
        nn.init.constant_(self.attn.bias, 0)
        nn.init.uniform_(self.v, -0.1, 0.1)

    def forward(self, hidden, encoder_outputs):
        seq_len = encoder_outputs.size(1)
        hidden = hidden.unsqueeze(1).repeat(1, seq_len, 1)
        energy = torch.tanh(self.attn(torch.cat((hidden, encoder_outputs), dim=2)))
        energy = energy.transpose(1, 2)
        v = self.v.repeat(encoder_outputs.size(0), 1).unsqueeze(1)
        attention_weights = torch.bmm(v, energy).squeeze(1)
        return torch.softmax(attention_weights, dim=1)

# Define Encoder
class LSTMEncoder(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, dropout):
        super(LSTMEncoder, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout)
        self.init_weights()

    def init_weights(self):
        for name, param in self.lstm.named_parameters():
            if 'weight' in name:
                nn.init.xavier_uniform_(param)
            elif 'bias' in name:
                nn.init.constant_(param, 0)

    def forward(self, x):
        outputs, (hidden, cell) = self.lstm(x)
        return outputs, hidden, cell

# Define Decoder with Attention
class LSTMDecoderWithAttention(nn.Module):
    def __init__(self, hidden_size, output_size, num_layers, dropout):
        super(LSTMDecoderWithAttention, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.attention = Attention(hidden_size)
        self.lstm = nn.LSTM(hidden_size + 4, hidden_size, num_layers, batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_size, output_size)
        self.init_weights()

    def init_weights(self):
        for name, param in self.lstm.named_parameters():
            if 'weight' in name:
                nn.init.xavier_uniform_(param)
            elif 'bias' in name:
                nn.init.constant_(param, 0)
        nn.init.xavier_uniform_(self.fc.weight)
        nn.init.constant_(self.fc.bias, 0)

    def forward(self, hidden, cell, encoder_outputs, current_temporal_features):
        attention_weights = self.attention(hidden[-1], encoder_outputs)
        attention_weights = attention_weights.unsqueeze(1)
        context = torch.bmm(attention_weights, encoder_outputs)
        decoder_input = torch.cat([context, current_temporal_features], dim=2)
        output, (hidden, cell) = self.lstm(decoder_input, (hidden, cell))
        prediction = self.fc(output).squeeze(1)
        return prediction, hidden, cell, attention_weights.squeeze(1)

# Define full Encoder-Decoder Model
class LSTMEncoderDecoderWithAttention(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers, dropout):
        super(LSTMEncoderDecoderWithAttention, self).__init__()
        self.encoder = LSTMEncoder(input_size, hidden_size, num_layers, dropout)
        self.decoder = LSTMDecoderWithAttention(hidden_size, output_size, num_layers, dropout)

    def forward(self, encoder_input, current_temporal_features):
        encoder_outputs, hidden, cell = self.encoder(encoder_input)
        prediction, _, _, attention_weights = self.decoder(hidden, cell, encoder_outputs, current_temporal_features)
        return prediction, attention_weights

# Define Autoencoder class
class Autoencoder(nn.Module):
    def __init__(self, input_dim, encoding_dim, hidden_dim):
        super(Autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, encoding_dim),
            nn.ReLU()
        )
        self.decoder = nn.Sequential(
            nn.Linear(encoding_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, input_dim),
            nn.Sigmoid()
        )
        self.init_weights()

    def init_weights(self):
        for layer in self.encoder:
            if isinstance(layer, nn.Linear):
                nn.init.xavier_uniform_(layer.weight)
                nn.init.constant_(layer.bias, 0)
        for layer in self.decoder:
            if isinstance(layer, nn.Linear):
                nn.init.xavier_uniform_(layer.weight)
                nn.init.constant_(layer.bias, 0)

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return encoded, decoded

def load_and_preprocess_data(file_path, features, target, current_temporal_features):
    df = pd.read_csv(file_path)

    # Check for missing values
    if df.isnull().sum().sum() > 0:
        df = df.fillna(method='ffill').fillna(method='bfill')

    # Normalize data
    scaler_features = MinMaxScaler()
    scaler_target = MinMaxScaler()

    # Normalize feature columns
    df[features] = scaler_features.fit_transform(df[features])

    # Normalize target columns
    df[target] = scaler_target.fit_transform(df[target])

    return df, scaler_target

def create_sequences(data, target_data, current_temporal_features, n_timesteps):
    X, y, current_time_features = [], [], []
    for i in range(len(data) - n_timesteps):
        X.append(data[i:i + n_timesteps].values)
        y.append(target_data.iloc[i + n_timesteps].values)
        current_time_features.append(data[current_temporal_features].iloc[i + n_timesteps].values)
    return np.array(X), np.array(y), np.array(current_time_features)

def prepare_data(df, features, target, current_temporal_features, n_timesteps, test_size=0.2):
    X, y, current_temporal = create_sequences(df[features], df[target], current_temporal_features, n_timesteps)
    X_train, X_test, y_train, y_test, current_temporal_train, current_temporal_test = train_test_split(
        X, y, current_temporal, test_size=test_size, random_state=42)

    # Convert to PyTorch tensors
    X_train = torch.tensor(X_train, dtype=torch.float32)
    X_test = torch.tensor(X_test, dtype=torch.float32)
    y_train = torch.tensor(y_train, dtype=torch.float32)
    y_test = torch.tensor(y_test, dtype=torch.float32)
    current_temporal_train = torch.tensor(current_temporal_train, dtype=torch.float32).unsqueeze(1)
    current_temporal_test = torch.tensor(current_temporal_test, dtype=torch.float32).unsqueeze(1)

    return X_train, X_test, y_train, y_test, current_temporal_train, current_temporal_test

def train_autoencoder(autoencoder, train_loader, n_epochs, ae_optimizer, ae_criterion):
    for epoch in range(n_epochs):
        autoencoder.train()
        total_loss = 0
        for batch in train_loader:
            ae_optimizer.zero_grad()
            combined_batch = batch[0]
            encoded, decoded = autoencoder(combined_batch)
            loss = ae_criterion(decoded, combined_batch)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(autoencoder.parameters(), max_norm=1.0)
            ae_optimizer.step()
            total_loss += loss.item()
        if (epoch + 1) % 10 == 0:
            print(f'Autoencoder Epoch [{epoch+1}/{n_epochs}], Loss: {total_loss/len(train_loader):.4f}')

def encode_data(autoencoder, data):
    autoencoder.eval()
    with torch.no_grad():
        encoded_data, _ = autoencoder.encoder(data)
    return encoded_data

def train_lstm(model, train_loader, n_epochs, optimizer, criterion, scheduler=None):
    for epoch in range(n_epochs):
        model.train()
        total_loss = 0
        for batch in train_loader:
            optimizer.zero_grad()
            X_batch, y_batch, current_temporal_batch = batch
            output, _ = model(X_batch, current_temporal_batch)
            loss = criterion(output, y_batch)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
            total_loss += loss.item()
        if scheduler:
            scheduler.step()
        if (epoch + 1) % 10 == 0:
            print(f'Epoch [{epoch+1}/{n_epochs}], Loss: {total_loss/len(train_loader):.4f}')

def evaluate_lstm(model, test_loader, scaler_target):
    model.eval()
    attention_weights_list = []
    with torch.no_grad():
        for batch in test_loader:
            X_batch, _, current_temporal_batch = batch
            forecast, attention_weights = model(X_batch, current_temporal_batch)
            attention_weights_list.append(attention_weights.cpu().numpy())

    # Convert list to numpy array
    attention_weights_array = np.concatenate(attention_weights_list, axis=0)

    # Inverse-transform the predictions to the original scale
    forecast_original_scale = scaler_target.inverse_transform(forecast.cpu().numpy())
    return forecast_original_scale, attention_weights_array

def visualize_attention_weights(attention_weights_array, sample_index=0):
    attention_weights_sample = attention_weights_array[sample_index]
    plt.figure(figsize=(10, 6))
    sns.heatmap(attention_weights_sample.reshape(1, -1), cmap='viridis', annot=True)
    plt.title('Attention Weights for Sample Index {}'.format(sample_index))
    plt.xlabel('Input Sequence Index')
    plt.ylabel('Attention Weight')
    plt.show()

def main_with_grid_search(file_path, features, target, current_temporal_features, n_timesteps, encoding_dim, hidden_dim):
    # Load and preprocess data
    df, scaler_target = load_and_preprocess_data(file_path, features, target, current_temporal_features)

    # Prepare data
    X_train, X_test, y_train, y_test, current_temporal_train, current_temporal_test = prepare_data(df, features, target, current_temporal_features, n_timesteps)

    # Combine macro features and target features for autoencoder training
    combined_train = torch.cat((X_train.view(-1, X_train.size(2)), y_train.view(-1, y_train.size(1))), dim=1)
    combined_test = torch.cat((X_test.view(-1, X_test.size(2)), y_test.view(-1, y_test.size(1))), dim=1)

    # Create DataLoader for autoencoder training
    train_dataset = TensorDataset(combined_train)
    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

    # Define the input dimension, encoding dimension, and hidden dimension
    input_dim = combined_train.size(1)

    # Initialize the autoencoder
    autoencoder = Autoencoder(input_dim, encoding_dim, hidden_dim)

    # Define the optimizer and loss function for autoencoder
    ae_optimizer = optim.Adam(autoencoder.parameters(), lr=0.001, weight_decay=1e-5)  # L2 regularization
    ae_criterion = nn.MSELoss()

    # Train the autoencoder
    train_autoencoder(autoencoder, train_loader, 50, ae_optimizer, ae_criterion)

    # Encode the training and test data
    combined_train_encoded = encode_data(autoencoder, combined_train)
    combined_test_encoded = encode_data(autoencoder, combined_test)

    # Reshape the encoded features back to the original sequence shape
    encoded_train = combined_train_encoded.view(X_train.size(0), X_train.size(1), -1)
    encoded_test = combined_test_encoded.view(X_test.size(0), X_test.size(1), -1)

    # Initialize the PyTorch model wrapper
    input_size = encoded_train.size(2)  # Encoded feature size
    output_size = y_train.size(1)  # Number of targets
    model = PyTorchLSTMWrapper(input_size, hidden_dim, output_size, 2, 0.3, 0.001, 50, 64)

    # Define the hyperparameter grid
    param_grid = {
        'hidden_size': [64, 128],
        'num_layers': [1, 2],
        'dropout': [0.2, 0.3],
        'learning_rate': [0.001, 0.01],
        'n_epochs': [30, 50],
        'batch_size': [32, 64]
    }

    # Perform GridSearchCV
    grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=3, scoring='neg_mean_squared_error', verbose=2)
    grid_search.fit(encoded_train, y_train, current_temporal_train)

    # Get the best model
    best_model = grid_search.best_estimator_

    # Evaluate the best model
    forecast_original_scale, attention_weights_array = evaluate_lstm(best_model.model, DataLoader(TensorDataset(encoded_test, y_test, current_temporal_test), batch_size=64, shuffle=False), scaler_target)

    # Compare predictions to the actual values
    y_test_original_scale = scaler_target.inverse_transform(y_test.cpu().numpy())
    print("Predictions on original scale:", forecast_original_scale)
    print("True values on original scale:", y_test_original_scale)

    # Visualize attention weights for a specific sample
    visualize_attention_weights(attention_weights_array)

# Define parameters
file_path = 'time_series_data.csv'
features = ['rate_level_1', 'rate_level_2', 'days_to_end_of_month', 'days_to_ECB_meeting', 'days_to_Fed_meeting', 'ois_sofr_rate']
target = ['rate_level_1', 'rate_level_2']
current_temporal_features = ['days_to_end_of_month', 'days_to_ECB_meeting', 'days_to_Fed_meeting', 'ois_sofr_rate']
n_timesteps = 12
encoding_dim = 20
hidden_dim = 128

# Run the main function with GridSearchCV
main_with_grid_search(file_path, features, target, current_temporal_features, n_timesteps, encoding_dim, hidden_dim)

In [None]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import joblib

def load_and_preprocess_data(file_path, features, target, current_temporal_features):
    df = pd.read_csv(file_path)

    # Check for missing values
    if df.isnull().sum().sum() > 0:
        df = df.fillna(method='ffill').fillna(method='bfill')

    # Normalize data
    scaler_features = MinMaxScaler()
    scaler_target = MinMaxScaler()

    # Normalize feature columns
    df[features] = scaler_features.fit_transform(df[features])

    # Normalize target columns
    df[target] = scaler_target.fit_transform(df[target])

    # Save the scalers for later use (e.g., inverse transforming predictions)
    joblib.dump(scaler_features, 'scaler_features.pkl')
    joblib.dump(scaler_target, 'scaler_target.pkl')

    return df, scaler_target

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestRegressor
import joblib

def load_and_preprocess_data(file_path, features, target, current_temporal_features):
    df = pd.read_csv(file_path)

    # Check for missing values
    if df.isnull().sum().sum() > 0:
        df = df.fillna(method='ffill').fillna(method='bfill')

    # Normalize data
    scaler_features = MinMaxScaler()
    scaler_target = MinMaxScaler()

    # Normalize feature columns
    df[features] = scaler_features.fit_transform(df[features])

    # Normalize target columns
    df[target] = scaler_target.fit_transform(df[target])

    # Save the scalers for later use (e.g., inverse transforming predictions)
    joblib.dump(scaler_features, 'scaler_features.pkl')
    joblib.dump(scaler_target, 'scaler_target.pkl')

    return df, scaler_target

def feature_selection(df, features, target, n_top_features=5):
    X = df[features]
    y = df[target]

    # Fit Random Forest model
    rf = RandomForestRegressor(n_estimators=100, random_state=42)
    rf.fit(X, y)

    # Get feature importances
    feature_importances = rf.feature_importances_

    # Create a DataFrame for feature importances
    feature_importance_df = pd.DataFrame({
        'Feature': features,
        'Importance': feature_importances
    })

    # Sort features by importance
    feature_importance_df = feature_importance_df.sort_values(by='Importance', ascending=False)

    # Select top N features
    top_features = feature_importance_df['Feature'].head(n_top_features).tolist()

    return top_features

def main_with_feature_selection(file_path, features, target, current_temporal_features, n_timesteps, encoding_dim, hidden_dim, n_top_features=5):
    # Load and preprocess data
    df, scaler_target = load_and_preprocess_data(file_path, features, target, current_temporal_features)

    # Feature selection
    top_features = feature_selection(df, features, target, n_top_features)
    print(f"Selected top {n_top_features} features: {top_features}")

    # Prepare data with selected features
    X_train, X_test, y_train, y_test, current_temporal_train, current_temporal_test = prepare_data(df, top_features, target, current_temporal_features, n_timesteps)

    # Combine macro features and target features for autoencoder training
    combined_train = torch.cat((X_train.view(-1, X_train.size(2)), y_train.view(-1, y_train.size(1))), dim=1)
    combined_test = torch.cat((X_test.view(-1, X_test.size(2)), y_test.view(-1, y_test.size(1))), dim=1)

    # Create DataLoader for autoencoder training
    train_dataset = TensorDataset(combined_train)
    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

    # Define the input dimension, encoding dimension, and hidden dimension
    input_dim = combined_train.size(1)

    # Initialize the autoencoder
    autoencoder = Autoencoder(input_dim, encoding_dim, hidden_dim)

    # Define the optimizer and loss function for autoencoder
    ae_optimizer = optim.Adam(autoencoder.parameters(), lr=0.001, weight_decay=1e-5)  # L2 regularization
    ae_criterion = nn.MSELoss()

    # Train the autoencoder
    train_autoencoder(autoencoder, train_loader, 50, ae_optimizer, ae_criterion)

    # Encode the training and test data
    combined_train_encoded = encode_data(autoencoder, combined_train)
    combined_test_encoded = encode_data(autoencoder, combined_test)

    # Reshape the encoded features back to the original sequence shape
    encoded_train = combined_train_encoded.view(X_train.size(0), X_train.size(1), -1)
    encoded_test = combined_test_encoded.view(X_test.size(0), X_test.size(1), -1)

    # Initialize the PyTorch model wrapper
    input_size = encoded_train.size(2)  # Encoded feature size
    output_size = y_train.size(1)  # Number of targets
    model = PyTorchLSTMWrapper(input_size, hidden_dim, output_size, 2, 0.3, 0.001, 50, 64)

    # Define the hyperparameter grid
    param_grid = {
        'hidden_size': [64, 128],
        'num_layers': [1, 2],
        'dropout': [0.2, 0.3],
        'learning_rate': [0.001, 0.01],
        'n_epochs': [30, 50],
        'batch_size': [32, 64]
    }

    # Perform GridSearchCV
    grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=3, scoring='neg_mean_squared_error', verbose=2)
    grid_search.fit(encoded_train, y_train, current_temporal_train)

    # Get the best model
    best_model = grid_search.best_estimator_

    # Evaluate the best model
    forecast_original_scale, attention_weights_array = evaluate_lstm(best_model.model, DataLoader(TensorDataset(encoded_test, y_test, current_temporal_test), batch_size=64, shuffle=False), scaler_target)

    # Compare predictions to the actual values
    y_test_original_scale = scaler_target.inverse_transform(y_test.cpu().numpy())
    print("Predictions on original scale:", forecast_original_scale)
    print("True values on original scale:", y_test_original_scale)

    # Visualize attention weights for a specific sample
    visualize_attention_weights(attention_weights_array)

# Define parameters
file_path = 'time_series_data.csv'
features = ['rate_level_1', 'rate_level_2', 'days_to_end_of_month', 'days_to_ECB_meeting', 'days_to_Fed_meeting', 'ois_sofr_rate']
target = ['rate_level_1', 'rate_level_2']
current_temporal_features = ['days_to_end_of_month', 'days_to_ECB_meeting', 'days_to_Fed_meeting', 'ois_sofr_rate']
n_timesteps = 12
encoding_dim = 20
hidden_dim = 128

# Run the main function with feature selection and GridSearchCV
main_with_feature_selection(file_path, features, target, current_temporal_features, n_timesteps, encoding_dim, hidden_dim)

In [None]:
from sklearn.feature_selection import SelectKBest, f_regression

def univariate_feature_selection(X, y, k=5):
    selector = SelectKBest(score_func=f_regression, k=k)
    selector.fit(X, y)
    selected_features = X.columns[selector.get_support()]
    return selected_features

# Example usage
X = df[features]
y = df[target]
selected_features = univariate_feature_selection(X, y)
print("Selected features:", selected_features)

In [None]:
from sklearn.feature_selection import RFE
from sklearn.linear_model import LinearRegression

def recursive_feature_elimination(X, y, n_features_to_select=5):
    model = LinearRegression()
    rfe = RFE(model, n_features_to_select=n_features_to_select)
    rfe.fit(X, y)
    selected_features = X.columns[rfe.support_]
    return selected_features

# Example usage
selected_features = recursive_feature_elimination(X, y)
print("Selected features:", selected_features)

In [None]:
from sklearn.decomposition import PCA

def apply_pca(X, n_components=5):
    pca = PCA(n_components=n_components)
    X_pca = pca.fit_transform(X)
    return X_pca, pca

# Example usage
X_pca, pca = apply_pca(X)
print("Explained variance ratio:", pca.explained_variance_ratio_)

In [None]:
from sklearn.linear_model import Lasso

def lasso_feature_selection(X, y, alpha=0.01):
    lasso = Lasso(alpha=alpha)
    lasso.fit(X, y)
    selected_features = X.columns[lasso.coef_ != 0]
    return selected_features

# Example usage
selected_features = lasso_feature_selection(X, y)
print("Selected features:", selected_features)

In [None]:
from sklearn.feature_selection import mutual_info_regression

def mutual_information_feature_selection(X, y, k=5):
    mi = mutual_info_regression(X, y)
    mi_series = pd.Series(mi, index=X.columns)
    selected_features = mi_series.nlargest(k).index
    return selected_features

# Example usage
selected_features = mutual_information_feature_selection(X, y)
print("Selected features:", selected_features)

In [None]:
def main_with_feature_selection(file_path, features, target, current_temporal_features, n_timesteps, encoding_dim, hidden_dim, n_top_features=5):
    # Load and preprocess data
    df, scaler_target = load_and_preprocess_data(file_path, features, target, current_temporal_features)

    # Feature selection using univariate selection
    X = df[features]
    y = df[target]
    selected_features = univariate_feature_selection(X, y, k=n_top_features)
    print(f"Selected top {n_top_features} features: {selected_features}")

    # Prepare data with selected features
    X_train, X_test, y_train, y_test, current_temporal_train, current_temporal_test = prepare_data(df, selected_features, target, current_temporal_features, n_timesteps)

    # Combine macro features and target features for autoencoder training
    combined_train = torch.cat((X_train.view(-1, X_train.size(2)), y_train.view(-1, y_train.size(1))), dim=1)
    combined_test = torch.cat((X_test.view(-1, X_test.size(2)), y_test.view(-1, y_test.size(1))), dim=1)

    # Create DataLoader for autoencoder training
    train_dataset = TensorDataset(combined_train)
    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

    # Define the input dimension, encoding dimension, and hidden dimension
    input_dim = combined_train.size(1)

    # Initialize the autoencoder
    autoencoder = Autoencoder(input_dim, encoding_dim, hidden_dim)

    # Define the optimizer and loss function for autoencoder
    ae_optimizer = optim.Adam(autoencoder.parameters(), lr=0.001, weight_decay=1e-5)  # L2 regularization
    ae_criterion = nn.MSELoss()

    # Train the autoencoder
    train_autoencoder(autoencoder, train_loader, 50, ae_optimizer, ae_criterion)

    # Encode the training and test data
    combined_train_encoded = encode_data(autoencoder, combined_train)
    combined_test_encoded = encode_data(autoencoder, combined_test)

    # Reshape the encoded features back to the original sequence shape
    encoded_train = combined_train_encoded.view(X_train.size(0), X_train.size(1), -1)
    encoded_test = combined_test_encoded.view(X_test.size(0), X_test.size(1), -1)

    # Initialize the PyTorch model wrapper
    input_size = encoded_train.size(2)  # Encoded feature size
    output_size = y_train.size(1)  # Number of targets
    model = PyTorchLSTMWrapper(input_size, hidden_dim, output_size, 2, 0.3, 0.001, 50, 64)

    # Define the hyperparameter grid
    param_grid = {
        'hidden_size': [64, 128],
        'num_layers': [1, 2],
        'dropout': [0.2, 0.3],
        'learning_rate': [0.001, 0.01],
        'n_epochs': [30, 50],
        'batch_size': [32, 64]
    }

    # Perform GridSearchCV
    grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=3, scoring='neg_mean_squared_error', verbose=2)
    grid_search.fit(encoded_train, y_train, current_temporal_train)

    # Get the best model
    best_model = grid_search.best_estimator_

    # Evaluate the best model
    forecast_original_scale, attention_weights_array = evaluate_lstm(best_model.model, DataLoader(TensorDataset(encoded_test, y_test, current_temporal_test), batch_size=64, shuffle=False), scaler_target)

    # Compare predictions to the actual values
    y_test_original_scale = scaler_target.inverse_transform(y_test.cpu().numpy())
    print("Predictions on original scale:", forecast_original_scale)
    print("True values on original scale:", y_test_original_scale)

    # Visualize attention weights for a specific sample
    visualize_attention_weights(attention_weights_array)

# Define parameters
file_path = 'time_series_data.csv'
features = ['rate_level_1', 'rate_level_2', 'days_to_end_of_month', 'days_to_ECB_meeting', 'days_to_Fed_meeting', 'ois_sofr_rate']
target = ['rate_level_1', 'rate_level_2']
current_temporal_features = ['days_to_end_of_month', 'days_to_ECB_meeting', 'days_to_Fed_meeting', 'ois_sofr_rate']
n_timesteps = 12
encoding_dim = 20
hidden_dim = 128

# Run the main function with feature selection and GridSearchCV
main_with_feature_selection(file_path, features, target, current_temporal_features, n_timesteps, encoding_dim, hidden_dim)

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import DataLoader, TensorDataset
import joblib

# Define Autoencoder class
class Autoencoder(nn.Module):
    def __init__(self, input_dim, encoding_dim, hidden_dim):
        super(Autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, encoding_dim),
            nn.ReLU()
        )
        self.decoder = nn.Sequential(
            nn.Linear(encoding_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, input_dim),
            nn.Sigmoid()
        )
        self.init_weights()

    def init_weights(self):
        for layer in self.encoder:
            if isinstance(layer, nn.Linear):
                nn.init.xavier_uniform_(layer.weight)
                nn.init.constant_(layer.bias, 0)
        for layer in self.decoder:
            if isinstance(layer, nn.Linear):
                nn.init.xavier_uniform_(layer.weight)
                nn.init.constant_(layer.bias, 0)

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return encoded, decoded

def load_and_preprocess_data(file_path, features, target, current_temporal_features):
    df = pd.read_csv(file_path)

    # Check for missing values
    if df.isnull().sum().sum() > 0:
        df = df.fillna(method='ffill').fillna(method='bfill')

    # Normalize data
    scaler_features = MinMaxScaler()
    scaler_target = MinMaxScaler()

    # Normalize feature columns
    df[features] = scaler_features.fit_transform(df[features])

    # Normalize target columns
    df[target] = scaler_target.fit_transform(df[target])

    # Save the scalers for later use (e.g., inverse transforming predictions)
    joblib.dump(scaler_features, 'scaler_features.pkl')
    joblib.dump(scaler_target, 'scaler_target.pkl')

    return df, scaler_target

def prepare_data(df, features, target, current_temporal_features, n_timesteps, test_size=0.2):
    X, y, current_temporal = create_sequences(df[features], df[target], current_temporal_features, n_timesteps)
    X_train, X_test, y_train, y_test, current_temporal_train, current_temporal_test = train_test_split(
        X, y, current_temporal, test_size=test_size, random_state=42)

    # Convert to PyTorch tensors
    X_train = torch.tensor(X_train, dtype=torch.float32)
    X_test = torch.tensor(X_test, dtype=torch.float32)
    y_train = torch.tensor(y_train, dtype=torch.float32)
    y_test = torch.tensor(y_test, dtype=torch.float32)
    current_temporal_train = torch.tensor(current_temporal_train, dtype=torch.float32).unsqueeze(1)
    current_temporal_test = torch.tensor(current_temporal_test, dtype=torch.float32).unsqueeze(1)

    return X_train, X_test, y_train, y_test, current_temporal_train, current_temporal_test

def create_sequences(data, target_data, current_temporal_features, n_timesteps):
    X, y, current_time_features = [], [], []
    for i in range(len(data) - n_timesteps):
        X.append(data[i:i + n_timesteps].values)
        y.append(target_data.iloc[i + n_timesteps].values)
        current_time_features.append(data[current_temporal_features].iloc[i + n_timesteps].values)
    return np.array(X), np.array(y), np.array(current_time_features)

def train_autoencoder(autoencoder, train_loader, n_epochs, ae_optimizer, ae_criterion):
    for epoch in range(n_epochs):
        autoencoder.train()
        total_loss = 0
        for batch in train_loader:
            ae_optimizer.zero_grad()
            combined_batch = batch[0]
            encoded, decoded = autoencoder(combined_batch)
            loss = ae_criterion(decoded, combined_batch)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(autoencoder.parameters(), max_norm=1.0)
            ae_optimizer.step()
            total_loss += loss.item()
        if (epoch + 1) % 10 == 0:
            print(f'Autoencoder Epoch [{epoch+1}/{n_epochs}], Loss: {total_loss/len(train_loader):.4f}')

def encode_data(autoencoder, data):
    autoencoder.eval()
    with torch.no_grad():
        encoded_data, _ = autoencoder.encoder(data)
    return encoded_data

def main_with_autoencoder_feature_selection(file_path, features, target, current_temporal_features, n_timesteps, encoding_dim, hidden_dim):
    # Load and preprocess data
    df, scaler_target = load_and_preprocess_data(file_path, features, target, current_temporal_features)

    # Prepare data
    X_train, X_test, y_train, y_test, current_temporal_train, current_temporal_test = prepare_data(df, features, target, current_temporal_features, n_timesteps)

    # Combine macro features and target features for autoencoder training
    combined_train = torch.cat((X_train.view(-1, X_train.size(2)), y_train.view(-1, y_train.size(1))), dim=1)
    combined_test = torch.cat((X_test.view(-1, X_test.size(2)), y_test.view(-1, y_test.size(1))), dim=1)

    # Create DataLoader for autoencoder training
    train_dataset = TensorDataset(combined_train)
    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

    # Define the input dimension, encoding dimension, and hidden dimension
    input_dim = combined_train.size(1)

    # Initialize the autoencoder
    autoencoder = Autoencoder(input_dim, encoding_dim, hidden_dim)

    # Define the optimizer and loss function for autoencoder
    ae_optimizer = optim.Adam(autoencoder.parameters(), lr=0.001, weight_decay=1e-5)  # L2 regularization
    ae_criterion = nn.MSELoss()

    # Train the autoencoder
    train_autoencoder(autoencoder, train_loader, 50, ae_optimizer, ae_criterion)

    # Encode the training and test data
    combined_train_encoded = encode_data(autoencoder, combined_train)
    combined_test_encoded = encode_data(autoencoder, combined_test)

    # Reshape the encoded features back to the original sequence shape
    encoded_train = combined_train_encoded.view(X_train.size(0), X_train.size(1), -1)
    encoded_test = combined_test_encoded.view(X_test.size(0), X_test.size(1), -1)

    # Now you can use encoded_train and encoded_test as the input features for your LSTM model
    # Initialize the PyTorch model wrapper
    input_size = encoded_train.size(2)  # Encoded feature size
    output_size = y_train.size(1)  # Number of targets
    model = PyTorchLSTMWrapper(input_size, hidden_dim, output_size, 2, 0.3, 0.001, 50, 64)

    # Define the hyperparameter grid
    param_grid = {
        'hidden_size': [64, 128],
        'num_layers': [1, 2],
        'dropout': [0.2, 0.3],
        'learning_rate': [0.001, 0.01],
        'n_epochs': [30, 50],
        'batch_size': [32, 64]
    }

    # Perform GridSearchCV
    grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=3, scoring='neg_mean_squared_error', verbose=2)
    grid_search.fit(encoded_train, y_train, current_temporal_train)

    # Get the best model
    best_model = grid_search.best_estimator_

    # Evaluate the best model
    forecast_original_scale, attention_weights_array = evaluate_lstm(best_model.model, DataLoader(TensorDataset(encoded_test, y_test, current_temporal_test), batch_size=64, shuffle=False), scaler_target)

    # Compare predictions to the actual values
    y_test_original_scale = scaler_target.inverse_transform(y_test.cpu().numpy())
    print("Predictions on original scale:", forecast_original_scale)
    print("True values on original scale:", y_test_original_scale)

    # Visualize attention weights for a specific sample
    visualize_attention_weights(attention_weights_array)

# Define parameters
file_path = 'time_series_data.csv'
features = ['rate_level_1', 'rate_level_2', 'days_to_end_of_month', 'days_to_ECB_meeting', 'days_to_Fed_meeting', 'ois_sofr_rate']
target = ['rate_level_1', 'rate_level_2']
current_temporal_features = ['days_to_end_of_month', 'days_to_ECB_meeting', 'days_to_Fed_meeting', 'ois_sofr_rate']
n_timesteps = 12
encoding_dim = 20
hidden_dim = 128

# Run the main function with autoencoder feature selection and GridSearchCV
main_with_autoencoder_feature_selection(file_path, features, target, current_temporal_features, n_timesteps, encoding_dim, hidden_dim)

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import DataLoader, TensorDataset
import joblib

# Define LSTM Encoder-Decoder Model with Attention
class Attention(nn.Module):
    def __init__(self, hidden_size):
        super(Attention, self).__init__()
        self.attn = nn.Linear(hidden_size * 2, hidden_size)
        self.v = nn.Parameter(torch.rand(hidden_size))
        self.init_weights()

    def init_weights(self):
        nn.init.xavier_uniform_(self.attn.weight)
        nn.init.constant_(self.attn.bias, 0)
        nn.init.uniform_(self.v, -0.1, 0.1)

    def forward(self, hidden, encoder_outputs):
        seq_len = encoder_outputs.size(1)
        hidden = hidden.unsqueeze(1).repeat(1, seq_len, 1)
        energy = torch.tanh(self.attn(torch.cat((hidden, encoder_outputs), dim=2)))
        energy = energy.transpose(1, 2)
        v = self.v.repeat(encoder_outputs.size(0), 1).unsqueeze(1)
        attention_weights = torch.bmm(v, energy).squeeze(1)
        return torch.softmax(attention_weights, dim=1)

class LSTMEncoder(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, dropout):
        super(LSTMEncoder, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout)
        self.init_weights()

    def init_weights(self):
        for name, param in self.lstm.named_parameters():
            if 'weight' in name:
                nn.init.xavier_uniform_(param)
            elif 'bias' in name:
                nn.init.constant_(param, 0)

    def forward(self, x):
        outputs, (hidden, cell) = self.lstm(x)
        return outputs, hidden, cell

class LSTMDecoderWithAttention(nn.Module):
    def __init__(self, hidden_size, output_size, num_layers, dropout):
        super(LSTMDecoderWithAttention, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.attention = Attention(hidden_size)
        self.lstm = nn.LSTM(hidden_size + 4, hidden_size, num_layers, batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_size, output_size)
        self.init_weights()

    def init_weights(self):
        for name, param in self.lstm.named_parameters():
            if 'weight' in name:
                nn.init.xavier_uniform_(param)
            elif 'bias' in name:
                nn.init.constant_(param, 0)
        nn.init.xavier_uniform_(self.fc.weight)
        nn.init.constant_(self.fc.bias, 0)

    def forward(self, hidden, cell, encoder_outputs, current_temporal_features):
        attention_weights = self.attention(hidden[-1], encoder_outputs)
        attention_weights = attention_weights.unsqueeze(1)
        context = torch.bmm(attention_weights, encoder_outputs)
        decoder_input = torch.cat([context, current_temporal_features], dim=2)
        output, (hidden, cell) = self.lstm(decoder_input, (hidden, cell))
        prediction = self.fc(output).squeeze(1)
        return prediction, hidden, cell, attention_weights.squeeze(1)

class LSTMEncoderDecoderWithAttention(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers, dropout):
        super(LSTMEncoderDecoderWithAttention, self).__init__()
        self.encoder = LSTMEncoder(input_size, hidden_size, num_layers, dropout)
        self.decoder = LSTMDecoderWithAttention(hidden_size, output_size, num_layers, dropout)

    def forward(self, encoder_input, current_temporal_features):
        encoder_outputs, hidden, cell = self.encoder(encoder_input)
        prediction, _, _, attention_weights = self.decoder(hidden, cell, encoder_outputs, current_temporal_features)
        return prediction, attention_weights

def load_and_preprocess_data(file_path, features, target, current_temporal_features):
    df = pd.read_csv(file_path)

    # Check for missing values
    if df.isnull().sum().sum() > 0:
        df = df.fillna(method='ffill').fillna(method='bfill')

    # Normalize data
    scaler_features = MinMaxScaler()
    scaler_target = MinMaxScaler()

    # Normalize feature columns
    df[features] = scaler_features.fit_transform(df[features])

    # Normalize target columns
    df[target] = scaler_target.fit_transform(df[target])

    # Save the scalers for later use (e.g., inverse transforming predictions)
    joblib.dump(scaler_features, 'scaler_features.pkl')
    joblib.dump(scaler_target, 'scaler_target.pkl')

    return df, scaler_target

def prepare_data(df, features, target, current_temporal_features, n_timesteps, test_size=0.2):
    X, y, current_temporal = create_sequences(df[features], df[target], current_temporal_features, n_timesteps)
    X_train, X_test, y_train, y_test, current_temporal_train, current_temporal_test = train_test_split(
        X, y, current_temporal, test_size=test_size, random_state=42)

    # Convert to PyTorch tensors
    X_train = torch.tensor(X_train, dtype=torch.float32)
    X_test = torch.tensor(X_test, dtype=torch.float32)
    y_train = torch.tensor(y_train, dtype=torch.float32)
    y_test = torch.tensor(y_test, dtype=torch.float32)
    current_temporal_train = torch.tensor(current_temporal_train, dtype=torch.float32).unsqueeze(1)
    current_temporal_test = torch.tensor(current_temporal_test, dtype=torch.float32).unsqueeze(1)

    return X_train, X_test, y_train, y_test, current_temporal_train, current_temporal_test

def create_sequences(data, target_data, current_temporal_features, n_timesteps):
    X, y, current_time_features = [], [], []
    for i in range(len(data) - n_timesteps):
        X.append(data[i:i + n_timesteps].values)
        y.append(target_data.iloc[i + n_timesteps].values)
        current_time_features.append(data[current_temporal_features].iloc[i + n_timesteps].values)
    return np.array(X), np.array(y), np.array(current_time_features)

def train_lstm(model, train_loader, n_epochs, optimizer, criterion, scheduler=None):
    for epoch in range(n_epochs):
        model.train()
        total_loss = 0
        for batch in train_loader:
            optimizer.zero_grad()
            X_batch, y_batch, current_temporal_batch = batch
            output, _ = model(X_batch, current_temporal_batch)
            loss = criterion(output, y_batch)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
            total_loss += loss.item()
        if scheduler:
            scheduler.step()
        if (epoch + 1) % 10 == 0:
            print(f'Epoch [{epoch+1}/{n_epochs}], Loss: {total_loss/len(train_loader):.4f}')

def evaluate_lstm(model, test_loader, scaler_target):
    model.eval()
    attention_weights_list = []
    with torch.no_grad():
        for batch in test_loader:
            X_batch, _, current_temporal_batch = batch
            forecast, attention_weights = model(X_batch, current_temporal_batch)
            attention_weights_list.append(attention_weights.cpu().numpy())

    # Convert list to numpy array
    attention_weights_array = np.concatenate(attention_weights_list, axis=0)

    # Inverse-transform the predictions to the original scale
    forecast_original_scale = scaler_target.inverse_transform(forecast.cpu().numpy())
    return forecast_original_scale, attention_weights_array

def visualize_attention_weights(attention_weights_array, sample_index=0):
    attention_weights_sample = attention_weights_array[sample_index]
    plt.figure(figsize=(10, 6))
    sns.heatmap(attention_weights_sample.reshape(1, -1), cmap='viridis', annot=True)
    plt.title('Attention Weights for Sample Index {}'.format(sample_index))
    plt.xlabel('Input Sequence Index')
    plt.ylabel('Attention Weight')
    plt.show()

def main_with_pretrained_model(file_path, features, target, current_temporal_features, n_timesteps, hidden_dim, num_layers, dropout, learning_rate, n_epochs, batch_size):
    # Load and preprocess data
    df, scaler_target = load_and_preprocess_data(file_path, features, target, current_temporal_features)

    # Prepare data
    X_train, X_test, y_train, y_test, current_temporal_train, current_temporal_test = prepare_data(df, features, target, current_temporal_features, n_timesteps)

    # Initialize the model
    input_size = X_train.size(2)
    output_size = y_train.size(1)
    model = LSTMEncoderDecoderWithAttention(input_size, hidden_dim, output_size, num_layers, dropout)

    # Load pre-trained weights
    pretrained_weights_path = 'pretrained_model.pth'
    model.load_state_dict(torch.load(pretrained_weights_path))

    # Fine-tune the model
    optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-5)
    criterion = nn.MSELoss()
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

    train_dataset = TensorDataset(X_train, y_train, current_temporal_train)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

    train_lstm(model, train_loader, n_epochs, optimizer, criterion, scheduler)

    # Evaluate the model
    test_dataset = TensorDataset(X_test, y_test, current_temporal_test)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    forecast_original_scale, attention_weights_array = evaluate_lstm(model, test_loader, scaler_target)

    # Compare predictions to the actual values
    y_test_original_scale = scaler_target.inverse_transform(y_test.cpu().numpy())
    print("Predictions on original scale:", forecast_original_scale)
    print("True values on original scale:", y_test_original_scale)

    # Visualize attention weights for a specific sample
    visualize_attention_weights(attention_weights_array)

# Define parameters
file_path = 'time_series_data.csv'
features = ['rate_level_1', 'rate_level_2', 'days_to_end_of_month', 'days_to_ECB_meeting', 'days_to_Fed_meeting', 'ois_sofr_rate']
target = ['rate_level_1', 'rate_level_2']
current_temporal_features = ['days_to_end_of_month', 'days_to_ECB_meeting', 'days_to_Fed_meeting', 'ois_sofr_rate']
n_timesteps = 12
hidden_dim = 128
num_layers = 2
dropout = 0.3
learning_rate = 0.001
n_epochs = 50
batch_size = 64

# Run the main function with pre-trained model
main_with_pretrained_model(file_path, features, target, current_temporal_features, n_timesteps, hidden_dim, num_layers, dropout, learning_rate, n_epochs, batch_size)

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import DataLoader, TensorDataset
import joblib

# Define LSTM Encoder-Decoder Model with Attention
class Attention(nn.Module):
    def __init__(self, hidden_size):
        super(Attention, self).__init__()
        self.attn = nn.Linear(hidden_size * 2, hidden_size)
        self.v = nn.Parameter(torch.rand(hidden_size))
        self.init_weights()

    def init_weights(self):
        nn.init.xavier_uniform_(self.attn.weight)
        nn.init.constant_(self.attn.bias, 0)
        nn.init.uniform_(self.v, -0.1, 0.1)

    def forward(self, hidden, encoder_outputs):
        seq_len = encoder_outputs.size(1)
        hidden = hidden.unsqueeze(1).repeat(1, seq_len, 1)
        energy = torch.tanh(self.attn(torch.cat((hidden, encoder_outputs), dim=2)))
        energy = energy.transpose(1, 2)
        v = self.v.repeat(encoder_outputs.size(0), 1).unsqueeze(1)
        attention_weights = torch.bmm(v, energy).squeeze(1)
        return torch.softmax(attention_weights, dim=1)

class LSTMEncoder(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, dropout):
        super(LSTMEncoder, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout)
        self.init_weights()

    def init_weights(self):
        for name, param in self.lstm.named_parameters():
            if 'weight' in name:
                nn.init.xavier_uniform_(param)
            elif 'bias' in name:
                nn.init.constant_(param, 0)

    def forward(self, x):
        outputs, (hidden, cell) = self.lstm(x)
        return outputs, hidden, cell

class LSTMDecoderWithAttention(nn.Module):
    def __init__(self, hidden_size, output_size, num_layers, dropout):
        super(LSTMDecoderWithAttention, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.attention = Attention(hidden_size)
        self.lstm = nn.LSTM(hidden_size + 4, hidden_size, num_layers, batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_size, output_size)
        self.init_weights()

    def init_weights(self):
        for name, param in self.lstm.named_parameters():
            if 'weight' in name:
                nn.init.xavier_uniform_(param)
            elif 'bias' in name:
                nn.init.constant_(param, 0)
        nn.init.xavier_uniform_(self.fc.weight)
        nn.init.constant_(self.fc.bias, 0)

    def forward(self, hidden, cell, encoder_outputs, current_temporal_features):
        attention_weights = self.attention(hidden[-1], encoder_outputs)
        attention_weights = attention_weights.unsqueeze(1)
        context = torch.bmm(attention_weights, encoder_outputs)
        decoder_input = torch.cat([context, current_temporal_features], dim=2)
        output, (hidden, cell) = self.lstm(decoder_input, (hidden, cell))
        prediction = self.fc(output).squeeze(1)
        return prediction, hidden, cell, attention_weights.squeeze(1)

class LSTMEncoderDecoderWithAttention(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers, dropout):
        super(LSTMEncoderDecoderWithAttention, self).__init__()
        self.encoder = LSTMEncoder(input_size, hidden_size, num_layers, dropout)
        self.decoder = LSTMDecoderWithAttention(hidden_size, output_size, num_layers, dropout)

    def forward(self, encoder_input, current_temporal_features):
        encoder_outputs, hidden, cell = self.encoder(encoder_input)
        prediction, _, _, attention_weights = self.decoder(hidden, cell, encoder_outputs, current_temporal_features)
        return prediction, attention_weights

def load_and_preprocess_data(file_path, features, target, current_temporal_features):
    df = pd.read_csv(file_path)

    # Check for missing values
    if df.isnull().sum().sum() > 0:
        df = df.fillna(method='ffill').fillna(method='bfill')

    # Normalize data
    scaler_features = MinMaxScaler()
    scaler_target = MinMaxScaler()

    # Normalize feature columns
    df[features] = scaler_features.fit_transform(df[features])

    # Normalize target columns
    df[target] = scaler_target.fit_transform(df[target])

    # Save the scalers for later use (e.g., inverse transforming predictions)
    joblib.dump(scaler_features, 'scaler_features.pkl')
    joblib.dump(scaler_target, 'scaler_target.pkl')

    return df, scaler_target

def prepare_data(df, features, target, current_temporal_features, n_timesteps, test_size=0.2):
    X, y, current_temporal = create_sequences(df[features], df[target], current_temporal_features, n_timesteps)
    X_train, X_test, y_train, y_test, current_temporal_train, current_temporal_test = train_test_split(
        X, y, current_temporal, test_size=test_size, random_state=42)

    # Convert to PyTorch tensors
    X_train = torch.tensor(X_train, dtype=torch.float32)
    X_test = torch.tensor(X_test, dtype=torch.float32)
    y_train = torch.tensor(y_train, dtype=torch.float32)
    y_test = torch.tensor(y_test, dtype=torch.float32)
    current_temporal_train = torch.tensor(current_temporal_train, dtype=torch.float32).unsqueeze(1)
    current_temporal_test = torch.tensor(current_temporal_test, dtype=torch.float32).unsqueeze(1)

    return X_train, X_test, y_train, y_test, current_temporal_train, current_temporal_test

def create_sequences(data, target_data, current_temporal_features, n_timesteps):
    X, y, current_time_features = [], [], []
    for i in range(len(data) - n_timesteps):
        X.append(data[i:i + n_timesteps].values)
        y.append(target_data.iloc[i + n_timesteps].values)
        current_time_features.append(data[current_temporal_features].iloc[i + n_timesteps].values)
    return np.array(X), np.array(y), np.array(current_time_features)

def train_lstm(model, train_loader, n_epochs, optimizer, criterion, scheduler=None):
    for epoch in range(n_epochs):
        model.train()
        total_loss = 0
        for batch in train_loader:
            optimizer.zero_grad()
            X_batch, y_batch, current_temporal_batch = batch
            output, _ = model(X_batch, current_temporal_batch)
            loss = criterion(output, y_batch)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
            total_loss += loss.item()
        if scheduler:
            scheduler.step()
        if (epoch + 1) % 10 == 0:
            print(f'Epoch [{epoch+1}/{n_epochs}], Loss: {total_loss/len(train_loader):.4f}')

def evaluate_lstm(model, test_loader, scaler_target):
    model.eval()
    attention_weights_list = []
    with torch.no_grad():
        for batch in test_loader:
            X_batch, _, current_temporal_batch = batch
            forecast, attention_weights = model(X_batch, current_temporal_batch)
            attention_weights_list.append(attention_weights.cpu().numpy())

    # Convert list to numpy array
    attention_weights_array = np.concatenate(attention_weights_list, axis=0)

    # Inverse-transform the predictions to the original scale
    forecast_original_scale = scaler_target.inverse_transform(forecast.cpu().numpy())
    return forecast_original_scale, attention_weights_array

def visualize_attention_weights(attention_weights_array, sample_index=0):
    attention_weights_sample = attention_weights_array[sample_index]
    plt.figure(figsize=(10, 6))
    sns.heatmap(attention_weights_sample.reshape(1, -1), cmap='viridis', annot=True)
    plt.title('Attention Weights for Sample Index {}'.format(sample_index))
    plt.xlabel('Input Sequence Index')
    plt.ylabel('Attention Weight')
    plt.show()

def main_with_fine_tuning(file_path, features, target, current_temporal_features, n_timesteps, hidden_dim, num_layers, dropout, learning_rate, n_epochs, batch_size, pretrained_model_path):
    # Load and preprocess data
    df, scaler_target = load_and_preprocess_data(file_path, features, target, current_temporal_features)

    # Prepare data
    X_train, X_test, y_train, y_test, current_temporal_train, current_temporal_test = prepare_data(df, features, target, current_temporal_features, n_timesteps)

    # Initialize the model
    input_size = X_train.size(2)
    output_size = y_train.size(1)
    model = LSTMEncoderDecoderWithAttention(input_size, hidden_dim, output_size, num_layers, dropout)

    # Load pre-trained weights
    model.load_state_dict(torch.load(pretrained_model_path))

    # Fine-tune the model
    optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-5)
    criterion = nn.MSELoss()
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

    train_dataset = TensorDataset(X_train, y_train, current_temporal_train)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

    train_lstm(model, train_loader, n_epochs, optimizer, criterion, scheduler)

    # Evaluate the model
    test_dataset = TensorDataset(X_test, y_test, current_temporal_test)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    forecast_original_scale, attention_weights_array = evaluate_lstm(model, test_loader, scaler_target)

    # Compare predictions to the actual values
    y_test_original_scale = scaler_target.inverse_transform(y_test.cpu().numpy())
    print("Predictions on original scale:", forecast_original_scale)
    print("True values on original scale:", y_test_original_scale)

    # Visualize attention weights for a specific sample
    visualize_attention_weights(attention_weights_array)

# Define parameters
file_path = 'time_series_data.csv'
features = ['rate_level_1', 'rate_level_2', 'days_to_end_of_month', 'days_to_ECB_meeting', 'days_to_Fed_meeting', 'ois_sofr_rate']
target = ['rate_level_1', 'rate_level_2']
current_temporal_features = ['days_to_end_of_month', 'days_to_ECB_meeting', 'days_to_Fed_meeting', 'ois_sofr_rate']
n_timesteps = 12
hidden_dim = 128
num_layers = 2
dropout = 0.3
learning_rate = 0.001
n_epochs = 50
batch_size = 64
pretrained_model_path = 'pretrained_model.pth'

# Run the main function with fine-tuning
main_with_fine_tuning(file_path, features, target, current_temporal_features, n_timesteps, hidden_dim, num_layers, dropout, learning_rate, n_epochs, batch_size, pretrained_model_path)

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import DataLoader, TensorDataset
import joblib

# Define LSTM Encoder-Decoder Model with Attention
class Attention(nn.Module):
    def __init__(self, hidden_size):
        super(Attention, self).__init__()
        self.attn = nn.Linear(hidden_size * 2, hidden_size)
        self.v = nn.Parameter(torch.rand(hidden_size))
        self.init_weights()

    def init_weights(self):
        nn.init.xavier_uniform_(self.attn.weight)
        nn.init.constant_(self.attn.bias, 0)
        nn.init.uniform_(self.v, -0.1, 0.1)

    def forward(self, hidden, encoder_outputs):
        seq_len = encoder_outputs.size(1)
        hidden = hidden.unsqueeze(1).repeat(1, seq_len, 1)
        energy = torch.tanh(self.attn(torch.cat((hidden, encoder_outputs), dim=2)))
        energy = energy.transpose(1, 2)
        v = self.v.repeat(encoder_outputs.size(0), 1).unsqueeze(1)
        attention_weights = torch.bmm(v, energy).squeeze(1)
        return torch.softmax(attention_weights, dim=1)

class LSTMEncoder(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, dropout):
        super(LSTMEncoder, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout)
        self.init_weights()

    def init_weights(self):
        for name, param in self.lstm.named_parameters():
            if 'weight' in name:
                nn.init.xavier_uniform_(param)
            elif 'bias' in name:
                nn.init.constant_(param, 0)

    def forward(self, x):
        outputs, (hidden, cell) = self.lstm(x)
        return outputs, hidden, cell

class LSTMDecoderWithAttention(nn.Module):
    def __init__(self, hidden_size, output_size, num_layers, dropout):
        super(LSTMDecoderWithAttention, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.attention = Attention(hidden_size)
        self.lstm = nn.LSTM(hidden_size + 4, hidden_size, num_layers, batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_size, output_size)
        self.init_weights()

    def init_weights(self):
        for name, param in self.lstm.named_parameters():
            if 'weight' in name:
                nn.init.xavier_uniform_(param)
            elif 'bias' in name:
                nn.init.constant_(param, 0)
        nn.init.xavier_uniform_(self.fc.weight)
        nn.init.constant_(self.fc.bias, 0)

    def forward(self, hidden, cell, encoder_outputs, current_temporal_features):
        attention_weights = self.attention(hidden[-1], encoder_outputs)
        attention_weights = attention_weights.unsqueeze(1)
        context = torch.bmm(attention_weights, encoder_outputs)
        decoder_input = torch.cat([context, current_temporal_features], dim=2)
        output, (hidden, cell) = self.lstm(decoder_input, (hidden, cell))
        prediction = self.fc(output).squeeze(1)
        return prediction, hidden, cell, attention_weights.squeeze(1)

class LSTMEncoderDecoderWithAttention(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers, dropout):
        super(LSTMEncoderDecoderWithAttention, self).__init__()
        self.encoder = LSTMEncoder(input_size, hidden_size, num_layers, dropout)
        self.decoder = LSTMDecoderWithAttention(hidden_size, output_size, num_layers, dropout)

    def forward(self, encoder_input, current_temporal_features):
        encoder_outputs, hidden, cell = self.encoder(encoder_input)
        prediction, _, _, attention_weights = self.decoder(hidden, cell, encoder_outputs, current_temporal_features)
        return prediction, attention_weights

def load_and_preprocess_data(file_path, features, target, current_temporal_features):
    df = pd.read_csv(file_path)

    # Check for missing values
    if df.isnull().sum().sum() > 0:
        df = df.fillna(method='ffill').fillna(method='bfill')

    # Normalize data
    scaler_features = MinMaxScaler()
    scaler_target = MinMaxScaler()

    # Normalize feature columns
    df[features] = scaler_features.fit_transform(df[features])

    # Normalize target columns
    df[target] = scaler_target.fit_transform(df[target])

    # Save the scalers for later use (e.g., inverse transforming predictions)
    joblib.dump(scaler_features, 'scaler_features.pkl')
    joblib.dump(scaler_target, 'scaler_target.pkl')

    return df, scaler_target

def prepare_data(df, features, target, current_temporal_features, n_timesteps, test_size=0.2):
    X, y, current_temporal = create_sequences(df[features], df[target], current_temporal_features, n_timesteps)
    X_train, X_test, y_train, y_test, current_temporal_train, current_temporal_test = train_test_split(
        X, y, current_temporal, test_size=test_size, random_state=42)

    # Convert to PyTorch tensors
    X_train = torch.tensor(X_train, dtype=torch.float32)
    X_test = torch.tensor(X_test, dtype=torch.float32)
    y_train = torch.tensor(y_train, dtype=torch.float32)
    y_test = torch.tensor(y_test, dtype=torch.float32)
    current_temporal_train = torch.tensor(current_temporal_train, dtype=torch.float32).unsqueeze(1)
    current_temporal_test = torch.tensor(current_temporal_test, dtype=torch.float32).unsqueeze(1)

    return X_train, X_test, y_train, y_test, current_temporal_train, current_temporal_test

def create_sequences(data, target_data, current_temporal_features, n_timesteps):
    X, y, current_time_features = [], [], []
    for i in range(len(data) - n_timesteps):
        X.append(data[i:i + n_timesteps].values)
        y.append(target_data.iloc[i + n_timesteps].values)
        current_time_features.append(data[current_temporal_features].iloc[i + n_timesteps].values)
    return np.array(X), np.array(y), np.array(current_time_features)

def train_lstm(model, train_loader, n_epochs, optimizer, criterion, scheduler=None):
    for epoch in range(n_epochs):
        model.train()
        total_loss = 0
        for batch in train_loader:
            optimizer.zero_grad()
            X_batch, y_batch, current_temporal_batch = batch
            output, _ = model(X_batch, current_temporal_batch)
            loss = criterion(output, y_batch)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
            total_loss += loss.item()
        if scheduler:
            scheduler.step()
        if (epoch + 1) % 10 == 0:
            print(f'Epoch [{epoch+1}/{n_epochs}], Loss: {total_loss/len(train_loader):.4f}')

def evaluate_lstm(model, test_loader, scaler_target):
    model.eval()
    attention_weights_list = []
    with torch.no_grad():
        for batch in test_loader:
            X_batch, _, current_temporal_batch = batch
            forecast, attention_weights = model(X_batch, current_temporal_batch)
            attention_weights_list.append(attention_weights.cpu().numpy())

    # Convert list to numpy array
    attention_weights_array = np.concatenate(attention_weights_list, axis=0)

    # Inverse-transform the predictions to the original scale
    forecast_original_scale = scaler_target.inverse_transform(forecast.cpu().numpy())
    return forecast_original_scale, attention_weights_array

def visualize_attention_weights(attention_weights_array, sample_index=0):
    attention_weights_sample = attention_weights_array[sample_index]
    plt.figure(figsize=(10, 6))
    sns.heatmap(attention_weights_sample.reshape(1, -1), cmap='viridis', annot=True)
    plt.title('Attention Weights for Sample Index {}'.format(sample_index))
    plt.xlabel('Input Sequence Index')
    plt.ylabel('Attention Weight')
    plt.show()

def main_with_training_and_saving(file_path, features, target, current_temporal_features, n_timesteps, hidden_dim, num_layers, dropout, learning_rate, n_epochs, batch_size, save_model_path):
    # Load and preprocess data
    df, scaler_target = load_and_preprocess_data(file_path, features, target, current_temporal_features)

    # Prepare data
    X_train, X_test, y_train, y_test, current_temporal_train, current_temporal_test = prepare_data(df, features, target, current_temporal_features, n_timesteps)

    # Initialize the model
    input_size = X_train.size(2)
    output_size = y_train.size(1)
    model = LSTMEncoderDecoderWithAttention(input_size, hidden_dim, output_size, num_layers, dropout)

    # Train the model
    optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-5)
    criterion = nn.MSELoss()
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

    train_dataset = TensorDataset(X_train, y_train, current_temporal_train)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

    train_lstm(model, train_loader, n_epochs, optimizer, criterion, scheduler)

    # Save the trained model
    torch.save(model.state_dict(), save_model_path)
    print(f"Model saved to {save_model_path}")

    # Evaluate the model
    test_dataset = TensorDataset(X_test, y_test, current_temporal_test)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    forecast_original_scale, attention_weights_array = evaluate_lstm(model, test_loader, scaler_target)

    # Compare predictions to the actual values
    y_test_original_scale = scaler_target.inverse_transform(y_test.cpu().numpy())
    print("Predictions on original scale:", forecast_original_scale)
    print("True values on original scale:", y_test_original_scale)

    # Visualize attention weights for a specific sample
    visualize_attention_weights(attention_weights_array)

# Define parameters
file_path = 'time_series_data.csv'
features = ['rate_level_1', 'rate_level_2', 'days_to_end_of_month', 'days_to_ECB_meeting', 'days_to_Fed_meeting', 'ois_sofr_rate']
target = ['rate_level_1', 'rate_level_2']
current_temporal_features = ['days_to_end_of_month', 'days_to_ECB_meeting', 'days_to_Fed_meeting', 'ois_sofr_rate']
n_timesteps = 12
hidden_dim = 128
num_layers = 2
dropout = 0.3
learning_rate = 0.001
n_epochs = 50
batch_size = 64
save_model_path = 'pretrained_model.pth'

# Run the main function with training and saving the model
main_with_training_and_saving(file_path, features, target, current_temporal_features, n_timesteps, hidden_dim, num_layers, dropout, learning_rate, n_epochs, batch_size, save_model_path)