In [None]:
import torch
import torch.nn as nn

class LSTMDecoderWithTeacherForcing(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers, dropout):
        super(LSTMDecoderWithTeacherForcing, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_size, output_size)
        self.hidden_size = hidden_size
        self.num_layers = num_layers

    def forward(self, encoder_outputs, hidden, cell, targets=None, teacher_forcing_ratio=0.5):
        batch_size = encoder_outputs.size(0)
        seq_len = encoder_outputs.size(1)
        output_size = self.fc.out_features

        outputs = torch.zeros(batch_size, seq_len, output_size).to(encoder_outputs.device)
        input = encoder_outputs[:, 0, :].unsqueeze(1)  # First input to the decoder is the first encoder output

        for t in range(1, seq_len):
            output, (hidden, cell) = self.lstm(input, (hidden, cell))
            output = self.fc(output.squeeze(1))
            outputs[:, t, :] = output

            teacher_force = torch.rand(1).item() < teacher_forcing_ratio
            input = targets[:, t, :].unsqueeze(1) if teacher_force else output.unsqueeze(1)

        return outputs

# Example usage
input_size = 10
hidden_size = 20
output_size = 10
num_layers = 2
dropout = 0.5

decoder = LSTMDecoderWithTeacherForcing(input_size, hidden_size, output_size, num_layers, dropout)
print(decoder)

In [None]:
class LSTMDecoderWithAttention(nn.Module):
    def __init__(self, hidden_size, output_size, num_layers, dropout, context_size):
        super(LSTMDecoderWithAttention, self).__init__()
        self.attention = Attention(hidden_size)
        self.lstm = nn.LSTM(hidden_size + context_size, hidden_size, num_layers, batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, encoder_outputs, hidden, cell, current_context, targets=None, teacher_forcing_ratio=0.5):
        batch_size = encoder_outputs.size(0)
        seq_len = encoder_outputs.size(1)
        output_size = self.fc.out_features

        outputs = torch.zeros(batch_size, seq_len, output_size).to(encoder_outputs.device)
        input = current_context.unsqueeze(1)  # First input to the decoder is the current context

        for t in range(seq_len):
            attn_weights = self.attention(hidden[-1], encoder_outputs)
            context = torch.bmm(attn_weights.unsqueeze(1), encoder_outputs).squeeze(1)
            lstm_input = torch.cat([context, input.squeeze(1)], dim=1).unsqueeze(1)  # Add sequence dimension
            output, (hidden, cell) = self.lstm(lstm_input, (hidden, cell))
            output = self.fc(output.squeeze(1))
            outputs[:, t, :] = output

            teacher_force = torch.rand(1).item() < teacher_forcing_ratio
            input = targets[:, t, :].unsqueeze(1) if teacher_force else output.unsqueeze(1)

        return outputs, attn_weights

In [None]:
import torch
import torch.nn as nn

# Define Attention Mechanism
class Attention(nn.Module):
    def __init__(self, hidden_size):
        super(Attention, self).__init__()
        self.attn = nn.Linear(hidden_size * 2, hidden_size)
        self.v = nn.Parameter(torch.rand(hidden_size))
        self.init_weights()

    def init_weights(self):
        nn.init.xavier_uniform_(self.attn.weight)
        nn.init.constant_(self.attn.bias, 0)
        nn.init.uniform_(self.v, -0.1, 0.1)

    def forward(self, hidden, encoder_outputs):
        seq_len = encoder_outputs.size(1)
        hidden = hidden.unsqueeze(1).repeat(1, seq_len, 1)
        energy = torch.tanh(self.attn(torch.cat((hidden, encoder_outputs), dim=2)))
        energy = energy.transpose(1, 2)
        v = self.v.repeat(encoder_outputs.size(0), 1).unsqueeze(1)
        attention_weights = torch.bmm(v, energy).squeeze(1)
        return torch.softmax(attention_weights, dim=1)

# Define LSTM Encoder
class LSTMEncoder(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, dropout):
        super(LSTMEncoder, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout)

    def forward(self, x):
        outputs, (hidden, cell) = self.lstm(x)
        return outputs, hidden, cell

# Define LSTM Decoder with Attention
class LSTMDecoderWithAttention(nn.Module):
    def __init__(self, hidden_size, output_size, num_layers, dropout):
        super(LSTMDecoderWithAttention, self).__init__()
        self.attention = Attention(hidden_size)
        self.lstm = nn.LSTM(hidden_size * 2, hidden_size, num_layers, batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, encoder_outputs, hidden, cell, targets=None, teacher_forcing_ratio=0.5):
        batch_size = encoder_outputs.size(0)
        seq_len = encoder_outputs.size(1)
        output_size = self.fc.out_features

        outputs = torch.zeros(batch_size, seq_len, output_size).to(encoder_outputs.device)
        input = encoder_outputs[:, -1, :].unsqueeze(1)  # First input to the decoder is the last hidden state of the encoder

        for t in range(seq_len):
            attn_weights = self.attention(hidden[-1], encoder_outputs)
            context = torch.bmm(attn_weights.unsqueeze(1), encoder_outputs).squeeze(1)
            lstm_input = torch.cat([context, input.squeeze(1)], dim=1).unsqueeze(1)  # Add sequence dimension
            output, (hidden, cell) = self.lstm(lstm_input, (hidden, cell))
            output = self.fc(output.squeeze(1))
            outputs[:, t, :] = output

            teacher_force = targets is not None and torch.rand(1).item() < teacher_forcing_ratio
            input = targets[:, t].unsqueeze(1) if teacher_force else output.unsqueeze(1)

        return outputs, attn_weights

# Define the Hybrid Model
class HybridModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers, dropout):
        super(HybridModel, self).__init__()
        self.encoder = LSTMEncoder(input_size, hidden_size, num_layers, dropout)
        self.decoder = LSTMDecoderWithAttention(hidden_size, output_size, num_layers, dropout)

    def forward(self, past_data, targets=None, teacher_forcing_ratio=0.5):
        encoder_outputs, hidden, cell = self.encoder(past_data)
        prediction, attn_weights = self.decoder(encoder_outputs, hidden, cell, targets, teacher_forcing_ratio)
        return prediction, attn_weights

# Example usage
input_size = 10
hidden_size = 128
output_size = 1
num_layers = 2
dropout = 0.5
batch_size = 32
seq_len = 20

model = HybridModel(input_size, hidden_size, output_size, num_layers, dropout)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Dummy data
past_data = torch.randn(batch_size, seq_len, input_size)
targets = torch.randn(batch_size, seq_len, output_size)

# Forward pass
outputs, attn_weights = model(past_data, targets, teacher_forcing_ratio=0.5)
loss = criterion(outputs, targets)
loss.backward()
optimizer.step()

print("Output shape:", outputs.shape)
print("Attention weights shape:", attn_weights.shape)

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class TimeSeriesTransformer(nn.Module):
    def __init__(self, config, input_size, output_size):
        super(TimeSeriesTransformer, self).__init__()
        self.config = config
        self.input_size = input_size
        self.output_size = output_size
        
        # Embedding layer for continuous inputs
        self.input_embedding = nn.Linear(input_size, config.n_embd)
        
        # Positional encodings
        self.position_embeddings = nn.Parameter(torch.zeros(1, config.block_size, config.n_embd))
        
        # Transformer layers
        self.transformer = nn.Transformer(
            d_model=config.n_embd, nhead=config.n_head, num_encoder_layers=config.n_layer
        )
        
        # Output layer to map transformer output to target size
        self.output_layer = nn.Linear(config.n_embd, output_size)
    
    def forward(self, inputs):
        # Embed the inputs and add positional encodings
        x = self.input_embedding(inputs) + self.position_embeddings[:, :inputs.size(1), :]
        
        # Transformer encoding
        x = self.transformer(x)
        
        # Output prediction
        out = self.output_layer(x)
        
        return out

# Configuration and model instantiation
class GPTConfig:
    n_embd = 128  # Embedding size
    n_head = 8    # Number of heads in multi-head attention
    n_layer = 4   # Number of transformer layers
    block_size = 30  # Maximum sequence length (e.g., number of days)

input_size = 5  # Number of features per day (e.g., target + macro data)
output_size = 1  # Number of target values per day (e.g., forecasting a single target)
config = GPTConfig()

model = TimeSeriesTransformer(config, input_size=input_size, output_size=output_size)


In [None]:
import torch.optim as optim

def train_lstm_with_teacher_forcing(model, train_loader, criterion, optimizer, num_epochs, teacher_forcing_ratio=0.5):
    model.train()
    for epoch in range(num_epochs):
        for batch in train_loader:
            inputs, targets = batch
            inputs, targets = inputs.to(model.device), targets.to(model.device)

            optimizer.zero_grad()

            # Forward pass
            outputs = model(inputs, targets, teacher_forcing_ratio=teacher_forcing_ratio)

            # Compute loss
            loss = criterion(outputs, targets)

            # Backward pass and optimization
            loss.backward()
            optimizer.step()

        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# Example usage
batch_size = 32
seq_len = 10
input_size = 10

# Create a random dataset
train_data = torch.randn(batch_size, seq_len, input_size)
train_targets = torch.randn(batch_size, seq_len, input_size)
train_dataset = torch.utils.data.TensorDataset(train_data, train_targets)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# Initialize model, criterion, and optimizer
model = LSTMDecoderWithTeacherForcing(input_size, hidden_size, output_size, num_layers, dropout)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Train the model with teacher forcing
train_lstm_with_teacher_forcing(model, train_loader, criterion, optimizer, num_epochs=10, teacher_forcing_ratio=0.5)