In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

# Define Autoencoder class
class Autoencoder(nn.Module):
    def __init__(self, input_dim, encoding_dim, hidden_dims):
        super(Autoencoder, self).__init__()
        encoder_layers = []
        current_dim = input_dim
        for hidden_dim in hidden_dims:
            encoder_layers.append(nn.Linear(current_dim, hidden_dim))
            encoder_layers.append(nn.ReLU())
            current_dim = hidden_dim
        encoder_layers.append(nn.Linear(current_dim, encoding_dim))
        encoder_layers.append(nn.ReLU())
        self.encoder = nn.Sequential(*encoder_layers)
        
        decoder_layers = []
        current_dim = encoding_dim
        for hidden_dim in reversed(hidden_dims):
            decoder_layers.append(nn.Linear(current_dim, hidden_dim))
            decoder_layers.append(nn.ReLU())
            current_dim = hidden_dim
        decoder_layers.append(nn.Linear(current_dim, input_dim))
        decoder_layers.append(nn.Sigmoid())
        self.decoder = nn.Sequential(*decoder_layers)

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return encoded, decoded

# Training function for the autoencoder
def train_autoencoder(autoencoder, train_loader, val_loader, n_epochs, optimizer, criterion, scheduler=None, patience=10):
    best_loss = float('inf')
    epochs_no_improve = 0
    early_stop = False

    for epoch in range(n_epochs):
        if early_stop:
            print("Early stopping")
            break

        autoencoder.train()
        total_loss = 0
        for batch in train_loader:
            optimizer.zero_grad()
            inputs, _ = batch
            _, decoded = autoencoder(inputs)
            loss = criterion(decoded, inputs)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        if scheduler:
            scheduler.step()

        # Validation
        autoencoder.eval()
        val_loss = 0
        with torch.no_grad():
            for batch in val_loader:
                inputs, _ = batch
                _, decoded = autoencoder(inputs)
                loss = criterion(decoded, inputs)
                val_loss += loss.item()

        val_loss /= len(val_loader)
        print(f'Epoch [{epoch+1}/{n_epochs}], Loss: {total_loss/len(train_loader):.4f}, Val Loss: {val_loss:.4f}')

        # Early stopping
        if val_loss < best_loss:
            best_loss = val_loss
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1
            if epochs_no_improve >= patience:
                print("Early stopping triggered")
                early_stop = True

# Example usage
input_dim = 75
encoding_dim = 10
hidden_dims = [64, 32]

autoencoder = Autoencoder(input_dim, encoding_dim, hidden_dims)
criterion = nn.MSELoss()
optimizer = optim.Adam(autoencoder.parameters(), lr=0.001)

# Assuming train_loader and val_loader are defined
# train_autoencoder(autoencoder, train_loader, val_loader, n_epochs=50, optimizer, criterion)

In [None]:
# Define Convolutional Layers with Batch Normalization using nn.Sequential
class ConvLayers(nn.Module):
    def __init__(self, input_size, hidden_size, kernel_size, dropout):
        super(ConvLayers, self).__init__()
        self.layers = nn.Sequential(
            nn.Conv1d(input_size, hidden_size, kernel_size, padding=(kernel_size - 1) // 2),
            nn.BatchNorm1d(hidden_size),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Conv1d(hidden_size, hidden_size, kernel_size, padding=(kernel_size - 1) // 2),
            nn.BatchNorm1d(hidden_size),
            nn.ReLU(),
            nn.Dropout(dropout)
        )

    def forward(self, x):
        x = x.permute(0, 2, 1)  # Change shape to (batch_size, input_size, seq_len)
        x = self.layers(x)
        x = x.permute(0, 2, 1)  # Change shape back to (batch_size, seq_len, hidden_size)
        return x

# Training function for the convolutional layers
def train_conv_layers(conv_layers, train_loader, val_loader, n_epochs, optimizer, criterion, scheduler=None, patience=10):
    best_loss = float('inf')
    epochs_no_improve = 0
    early_stop = False

    for epoch in range(n_epochs):
        if early_stop:
            print("Early stopping")
            break

        conv_layers.train()
        total_loss = 0
        for batch in train_loader:
            optimizer.zero_grad()
            inputs, _ = batch
            encoded, _ = autoencoder(inputs)
            outputs = conv_layers(encoded)
            loss = criterion(outputs, encoded)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        if scheduler:
            scheduler.step()

        # Validation
        conv_layers.eval()
        val_loss = 0
        with torch.no_grad():
            for batch in val_loader:
                inputs, _ = batch
                encoded, _ = autoencoder(inputs)
                outputs = conv_layers(encoded)
                loss = criterion(outputs, encoded)
                val_loss += loss.item()

        val_loss /= len(val_loader)
        print(f'Epoch [{epoch+1}/{n_epochs}], Loss: {total_loss/len(train_loader):.4f}, Val Loss: {val_loss:.4f}')

        # Early stopping
        if val_loss < best_loss:
            best_loss = val_loss
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1
            if epochs_no_improve >= patience:
                print("Early stopping triggered")
                early_stop = True

# Example usage
conv_hidden_size = 64
conv_kernel_size = 3
dropout = 0.5

conv_layers = ConvLayers(encoding_dim, conv_hidden_size, conv_kernel_size, dropout)
criterion = nn.MSELoss()
optimizer = optim.Adam(conv_layers.parameters(), lr=0.001)

# Assuming train_loader and val_loader are defined
# train_conv_layers(conv_layers, train_loader, val_loader, n_epochs=50, optimizer, criterion)

In [None]:
# Define LSTM Encoder
class LSTMEncoder(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, dropout):
        super(LSTMEncoder, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout)

    def forward(self, x):
        outputs, (hidden, cell) = self.lstm(x)
        return outputs, hidden, cell

# Training function for the LSTM encoder
def train_lstm_encoder(lstm_encoder, train_loader, val_loader, n_epochs, optimizer, criterion, scheduler=None, patience=10):
    best_loss = float('inf')
    epochs_no_improve = 0
    early_stop = False

    for epoch in range(n_epochs):
        if early_stop:
            print("Early stopping")
            break

        lstm_encoder.train()
        total_loss = 0
        for batch in train_loader:
            optimizer.zero_grad()
            inputs, _ = batch
            encoded, _ = autoencoder(inputs)
            conv_outputs = conv_layers(encoded)
            outputs, hidden, cell = lstm_encoder(conv_outputs)
            loss = criterion(outputs, conv_outputs)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        if scheduler:
            scheduler.step()

        # Validation
        lstm_encoder.eval()
        val_loss = 0
        with torch.no_grad():
            for batch in val_loader:
                inputs, _ = batch
                encoded, _ = autoencoder(inputs)
                conv_outputs = conv_layers(encoded)
                outputs, hidden, cell = lstm_encoder(conv_outputs)
                loss = criterion(outputs, conv_outputs)
                val_loss += loss.item()

        val_loss /= len(val_loader)
        print(f'Epoch [{epoch+1}/{n_epochs}], Loss: {total_loss/len(train_loader):.4f}, Val Loss: {val_loss:.4f}')

        # Early stopping
        if val_loss < best_loss:
            best_loss = val_loss
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1
            if epochs_no_improve >= patience:
                print("Early stopping triggered")
                early_stop = True

# Example usage
lstm_hidden_size = 128
lstm_num_layers = 2
dropout = 0.5

lstm_encoder = LSTMEncoder(conv_hidden_size, lstm_hidden_size, lstm_num_layers, dropout)
criterion = nn.MSELoss()
optimizer = optim.Adam(lstm_encoder.parameters(), lr=0.001)

# Assuming train_loader and val_loader are defined
# train_lstm_encoder(lstm_encoder, train_loader, val_loader, n_epochs=50, optimizer, criterion)

In [None]:
# Define Transformer Encoder
class TransformerEncoder(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_heads, dropout):
        super(TransformerEncoder, self).__init__()
        self.embedding = nn.Linear(input_size, hidden_size)
        self.positional_encoding = nn.Parameter(torch.zeros(1, 1000, hidden_size))  # Assuming max sequence length of 1000
        self.encoder_layers = nn.TransformerEncoderLayer(hidden_size, num_heads, hidden_size * 4, dropout)
        self.transformer_encoder = nn.TransformerEncoder(self.encoder_layers, num_layers)

    def forward(self, x):
        x = self.embedding(x) + self.positional_encoding[:, :x.size(1), :]
        x = self.transformer_encoder(x)
        return x

# Training function for the transformer encoder
def train_transformer_encoder(transformer_encoder, train_loader, val_loader, n_epochs, optimizer, criterion, scheduler=None, patience=10):
    best_loss = float('inf')
    epochs_no_improve = 0
    early_stop = False

    for epoch in range(n_epochs):
        if early_stop:
            print("Early stopping")
            break

        transformer_encoder.train()
        total_loss = 0
        for batch in train_loader:
            optimizer.zero_grad()
            inputs, _ = batch
            encoded, _ = autoencoder(inputs)
            conv_outputs = conv_layers(encoded)
            lstm_outputs, hidden, cell = lstm_encoder(conv_outputs)
            outputs = transformer_encoder(lstm_outputs)
            loss = criterion(outputs, lstm_outputs)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        if scheduler:
            scheduler.step()

        # Validation
        transformer_encoder.eval()
        val_loss = 0
        with torch.no_grad():
            for batch in val_loader:
                inputs, _ = batch
                encoded, _ = autoencoder(inputs)
                conv_outputs = conv_layers(encoded)
                lstm_outputs, hidden, cell = lstm_encoder(conv_outputs)
                outputs = transformer_encoder(lstm_outputs)
                loss = criterion(outputs, lstm_outputs)
                val_loss += loss.item()

        val_loss /= len(val_loader)
        print(f'Epoch [{epoch+1}/{n_epochs}], Loss: {total_loss/len(train_loader):.4f}, Val Loss: {val_loss:.4f}')

        # Early stopping
        if val_loss < best_loss:
            best_loss = val_loss
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1
            if epochs_no_improve >= patience:
                print("Early stopping triggered")
                early_stop = True

# Example usage
transformer_hidden_size = 256
transformer_num_layers = 2
transformer_num_heads = 4
dropout = 0.5

transformer_encoder = TransformerEncoder(lstm_hidden_size, transformer_hidden_size, transformer_num_layers, transformer_num_heads, dropout)
criterion = nn.MSELoss()
optimizer = optim.Adam(transformer_encoder.parameters(), lr=0.001)

# Assuming train_loader and val_loader are defined
# train_transformer_encoder(transformer_encoder, train_loader, val_loader, n_epochs=50, optimizer, criterion)

In [None]:
# Define Hybrid Model with Models in Series
class HybridForecastingModel(nn.Module):
    def __init__(self, autoencoder, conv_layers, lstm_encoder, transformer_encoder, lstm_decoder, context_size, output_size):
        super(HybridForecastingModel, self).__init__()
        self.autoencoder = autoencoder
        self.conv_layers = conv_layers
        self.lstm_encoder = lstm_encoder
        self.transformer_encoder = transformer_encoder
        self.lstm_decoder = lstm_decoder
        self.fc = nn.Linear(transformer_encoder.hidden_size, output_size)

    def forward(self, past_data, current_context, targets=None, teacher_forcing_ratio=0.5):
        with torch.no_grad():
            encoded, _ = self.autoencoder(past_data)
        conv_outputs = self.conv_layers(encoded)
        encoder_outputs, hidden, cell = self.lstm_encoder(conv_outputs)
        lstm_output, attn_weights = self.lstm_decoder(encoder_outputs, hidden, cell, current_context, targets, teacher_forcing_ratio)
        transformer_outputs = self.transformer_encoder(lstm_output.unsqueeze(1))
        output = self.fc(transformer_outputs.squeeze(1))
        return output, attn_weights

# Training function for the hybrid model
def train_hybrid(model, train_loader, val_loader, n_epochs, optimizer, criterion, scheduler=None, patience=10, teacher_forcing_ratio=0.5):
    best_loss = float('inf')
    epochs_no_improve = 0
    early_stop = False

    for epoch in range(n_epochs):
        if early_stop:
            print("Early stopping")
            break

        model.train()
        total_loss = 0
        for batch in train_loader:
            optimizer.zero_grad()
            past_data, y_batch, context_data = batch
            output, _ = model(past_data, context_data, y_batch, teacher_forcing_ratio)
            loss = criterion(output, y_batch)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
            total_loss += loss.item()

        if scheduler:
            scheduler.step()

        # Validation
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for batch in val_loader:
                past_data, y_batch, context_data = batch
                output, _ = model(past_data, context_data)
                loss = criterion(output, y_batch)
                val_loss += loss.item()

        val_loss /= len(val_loader)
        print(f'Epoch [{epoch+1}/{n_epochs}], Loss: {total_loss/len(train_loader):.4f}, Val Loss: {val_loss:.4f}')

        # Early stopping
        if val_loss < best_loss:
            best_loss = val_loss
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1
            if epochs_no_improve >= patience:
                print("Early stopping triggered")
                early_stop = True

# Example usage
context_size = 10
output_size = 10

model = HybridForecastingModel(autoencoder, conv_layers, lstm_encoder, transformer_encoder, lstm_decoder, context_size, output_size)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Assuming train_loader and val_loader are defined
# train_hybrid(model, train_loader, val_loader, n_epochs=50, optimizer, criterion, teacher_forcing_ratio=0.5)

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

# Define Autoencoder class
class Autoencoder(nn.Module):
    def __init__(self, input_dim, encoding_dim, hidden_dims):
        super(Autoencoder, self).__init__()
        encoder_layers = []
        current_dim = input_dim
        for hidden_dim in hidden_dims:
            encoder_layers.append(nn.Linear(current_dim, hidden_dim))
            encoder_layers.append(nn.ReLU())
            current_dim = hidden_dim
        encoder_layers.append(nn.Linear(current_dim, encoding_dim))
        encoder_layers.append(nn.ReLU())
        self.encoder = nn.Sequential(*encoder_layers)
        
        decoder_layers = []
        current_dim = encoding_dim
        for hidden_dim in reversed(hidden_dims):
            decoder_layers.append(nn.Linear(current_dim, hidden_dim))
            decoder_layers.append(nn.ReLU())
            current_dim = hidden_dim
        decoder_layers.append(nn.Linear(current_dim, input_dim))
        decoder_layers.append(nn.Sigmoid())
        self.decoder = nn.Sequential(*decoder_layers)

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return encoded, decoded

# Define Convolutional Layers with Batch Normalization using nn.Sequential
class ConvLayers(nn.Module):
    def __init__(self, input_size, hidden_size, kernel_size, dropout):
        super(ConvLayers, self).__init__()
        self.layers = nn.Sequential(
            nn.Conv1d(input_size, hidden_size, kernel_size, padding=(kernel_size - 1) // 2),
            nn.BatchNorm1d(hidden_size),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Conv1d(hidden_size, hidden_size, kernel_size, padding=(kernel_size - 1) // 2),
            nn.BatchNorm1d(hidden_size),
            nn.ReLU(),
            nn.Dropout(dropout)
        )

    def forward(self, x):
        x = x.permute(0, 2, 1)  # Change shape to (batch_size, input_size, seq_len)
        x = self.layers(x)
        x = x.permute(0, 2, 1)  # Change shape back to (batch_size, seq_len, hidden_size)
        return x

# Define LSTM Encoder
class LSTMEncoder(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, dropout):
        super(LSTMEncoder, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout)

    def forward(self, x):
        outputs, (hidden, cell) = self.lstm(x)
        return outputs, hidden, cell

# Define LSTM Decoder with Attention and Teacher Forcing
class LSTMDecoderWithAttention(nn.Module):
    def __init__(self, hidden_size, output_size, num_layers, dropout, context_size):
        super(LSTMDecoderWithAttention, self).__init__()
        self.attention = Attention(hidden_size)
        self.lstm = nn.LSTM(hidden_size + context_size, hidden_size, num_layers, batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, encoder_outputs, hidden, cell, current_context, targets=None, teacher_forcing_ratio=0.5):
        batch_size = encoder_outputs.size(0)
        seq_len = encoder_outputs.size(1)
        output_size = self.fc.out_features

        outputs = torch.zeros(batch_size, seq_len, output_size).to(encoder_outputs.device)
        input = current_context.unsqueeze(1)  # First input to the decoder is the current context

        for t in range(seq_len):
            attn_weights = self.attention(hidden[-1], encoder_outputs)
            context = torch.bmm(attn_weights.unsqueeze(1), encoder_outputs).squeeze(1)
            lstm_input = torch.cat([context, input.squeeze(1)], dim=1).unsqueeze(1)  # Add sequence dimension
            output, (hidden, cell) = self.lstm(lstm_input, (hidden, cell))
            output = self.fc(output.squeeze(1))
            outputs[:, t, :] = output

            teacher_force = torch.rand(1).item() < teacher_forcing_ratio
            input = targets[:, t, :].unsqueeze(1) if teacher_force else output.unsqueeze(1)

        return outputs, attn_weights

# Define Transformer Encoder
class TransformerEncoder(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_heads, dropout):
        super(TransformerEncoder, self).__init__()
        self.embedding = nn.Linear(input_size, hidden_size)
        self.positional_encoding = nn.Parameter(torch.zeros(1, 1000, hidden_size))  # Assuming max sequence length of 1000
        self.encoder_layers = nn.TransformerEncoderLayer(hidden_size, num_heads, hidden_size * 4, dropout)
        self.transformer_encoder = nn.TransformerEncoder(self.encoder_layers, num_layers)

    def forward(self, x):
        x = self.embedding(x) + self.positional_encoding[:, :x.size(1), :]
        x = self.transformer_encoder(x)
        return x

# Define Hybrid Model with Models in Series
class HybridForecastingModel(nn.Module):
    def __init__(self, input_dim, encoding_dim, hidden_dims, conv_hidden_size, lstm_hidden_size, transformer_hidden_size, context_size, output_size, lstm_num_layers, transformer_num_layers, transformer_num_heads, conv_kernel_size, dropout):
        super(HybridForecastingModel, self).__init__()
        self.autoencoder = Autoencoder(input_dim, encoding_dim, hidden_dims)
        self.conv_layers = ConvLayers(encoding_dim, conv_hidden_size, conv_kernel_size, dropout)
        self.lstm_encoder = LSTMEncoder(conv_hidden_size, lstm_hidden_size, lstm_num_layers, dropout)
        self.lstm_decoder = LSTMDecoderWithAttention(lstm_hidden_size, output_size, lstm_num_layers, dropout, context_size)
        self.transformer_encoder = TransformerEncoder(output_size, transformer_hidden_size, transformer_num_layers, transformer_num_heads, dropout)
        self.fc = nn.Linear(transformer_hidden_size, output_size)

    def forward(self, past_data, current_context, targets=None, teacher_forcing_ratio=0.5):
        encoded, _ = self.autoencoder(past_data)
        conv_outputs = self.conv_layers(encoded)
        encoder_outputs, hidden, cell = self.lstm_encoder(conv_outputs)
        lstm_output, attn_weights = self.lstm_decoder(encoder_outputs, hidden, cell, current_context, targets, teacher_forcing_ratio)
        transformer_outputs = self.transformer_encoder(lstm_output.unsqueeze(1))
        output = self.fc(transformer_outputs.squeeze(1))
        return output, attn_weights

# Training function for the hybrid model
def train_hybrid(model, train_loader, val_loader, n_epochs, optimizer, criterion, scheduler=None, patience=10, teacher_forcing_ratio=0.5):
    best_loss = float('inf')
    epochs_no_improve = 0
    early_stop = False

    for epoch in range(n_epochs):
        if early_stop:
            print("Early stopping")
            break

        model.train()
        total_loss = 0
        for batch in train_loader:
            optimizer.zero_grad()
            past_data, y_batch, context_data = batch
            output, _ = model(past_data, context_data, y_batch, teacher_forcing_ratio)
            loss = criterion(output, y_batch)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
            total_loss += loss.item()

        if scheduler:
            scheduler.step()

        # Validation
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for batch in val_loader:
                past_data, y_batch, context_data = batch
                output, _ = model(past_data, context_data)
                loss = criterion(output, y_batch)
                val_loss += loss.item()

        val_loss /= len(val_loader)
        print(f'Epoch [{epoch+1}/{n_epochs}], Loss: {total_loss/len(train_loader):.4f}, Val Loss: {val_loss:.4f}')

        # Early stopping
        if val_loss < best_loss:
            best_loss = val_loss
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1
            if epochs_no_improve >= patience:
                print("Early stopping triggered")
                early_stop = True

# Example usage
input_dim = 75
encoding_dim = 10
hidden_dims = [64, 32]
context_size = 10
conv_hidden_size = 64
lstm_hidden_size = 128
transformer_hidden_size = 256
output_size = 10
lstm_num_layers = 2
transformer_num_layers = 2
transformer_num_heads = 4
conv_kernel_size = 3
dropout = 0.5

model = HybridForecastingModel(input_dim, encoding_dim, hidden_dims, conv_hidden_size, lstm_hidden_size, transformer_hidden_size, context_size, output_size, lstm_num_layers, transformer_num_layers, transformer_num_heads, conv_kernel_size, dropout)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Assuming train_loader and val_loader are defined
# train_hybrid(model, train_loader, val_loader, n_epochs=50, optimizer, criterion, teacher_forcing_ratio=0.5)

In [None]:
class LSTMDecoderWithAttention(nn.Module):
    def __init__(self, hidden_size, output_size, num_layers, dropout, context_size):
        super(LSTMDecoderWithAttention, self).__init__()
        self.attention = Attention(hidden_size)
        self.lstm = nn.LSTM(hidden_size + context_size, hidden_size, num_layers, batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, encoder_outputs, hidden, cell, current_context, targets=None, teacher_forcing_ratio=0.5):
        batch_size = encoder_outputs.size(0)
        seq_len = encoder_outputs.size(1)
        output_size = self.fc.out_features

        outputs = torch.zeros(batch_size, seq_len, output_size).to(encoder_outputs.device)
        input = current_context.unsqueeze(1)  # First input to the decoder is the current context

        for t in range(seq_len):
            attn_weights = self.attention(hidden[-1], encoder_outputs)
            context = torch.bmm(attn_weights.unsqueeze(1), encoder_outputs).squeeze(1)
            lstm_input = torch.cat([context, input.squeeze(1)], dim=1).unsqueeze(1)  # Add sequence dimension
            output, (hidden, cell) = self.lstm(lstm_input, (hidden, cell))
            output = self.fc(output.squeeze(1))
            outputs[:, t, :] = output

            teacher_force = targets is not None and torch.rand(1).item() < teacher_forcing_ratio
            input = targets[:, t, :].unsqueeze(1) if teacher_force else output.unsqueeze(1)

        return outputs, attn_weights

In [None]:
# Training function for the hybrid model
def train_hybrid(model, train_loader, val_loader, n_epochs, optimizer, criterion, scheduler=None, patience=10, teacher_forcing_ratio=0.5):
    best_loss = float('inf')
    epochs_no_improve = 0
    early_stop = False

    for epoch in range(n_epochs):
        if early_stop:
            print("Early stopping")
            break

        model.train()
        total_loss = 0
        for batch in train_loader:
            optimizer.zero_grad()
            past_data, y_batch, context_data = batch
            output, _ = model(past_data, context_data, y_batch, teacher_forcing_ratio)  # Provide targets during training
            loss = criterion(output, y_batch)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
            total_loss += loss.item()

        if scheduler:
            scheduler.step()

        # Validation
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for batch in val_loader:
                past_data, y_batch, context_data = batch
                output, _ = model(past_data, context_data)  # Set targets to None during validation
                loss = criterion(output, y_batch)
                val_loss += loss.item()

        val_loss /= len(val_loader)
        print(f'Epoch [{epoch+1}/{n_epochs}], Loss: {total_loss/len(train_loader):.4f}, Val Loss: {val_loss:.4f}')

        # Early stopping
        if val_loss < best_loss:
            best_loss = val_loss
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1
            if epochs_no_improve >= patience:
                print("Early stopping triggered")
                early_stop = True

# Evaluation function for the hybrid model
def evaluate_hybrid(model, test_loader):
    model.eval()
    with torch.no_grad():
        hybrid_test_preds = []
        for batch in test_loader:
            past_data, _, context_data = batch
            hybrid_output, _ = model(past_data, context_data)  # Set targets to None during evaluation
            hybrid_test_preds.append(hybrid_output.cpu().numpy())
        hybrid_test_preds = np.concatenate(hybrid_test_preds, axis=0)
    return hybrid_test_preds

# Example usage
input_dim = 75
encoding_dim = 10
hidden_dims = [64, 32]
context_size = 10
conv_hidden_size = 64
lstm_hidden_size = 128
transformer_hidden_size = 256
output_size = 10
lstm_num_layers = 2
transformer_num_layers = 2
transformer_num_heads = 4
conv_kernel_size = 3
dropout = 0.5

model = HybridForecastingModel(input_dim, encoding_dim, hidden_dims, conv_hidden_size, lstm_hidden_size, transformer_hidden_size, context_size, output_size, lstm_num_layers, transformer_num_layers, transformer_num_heads, conv_kernel_size, dropout)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Assuming train_loader and val_loader are defined
# train_hybrid(model, train_loader, val_loader, n_epochs=50, optimizer, criterion, teacher_forcing_ratio=0.5)

# Assuming test_loader is defined
# hybrid_test_preds = evaluate_hybrid(model, test_loader)

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

# Define Autoencoder class
class Autoencoder(nn.Module):
    def __init__(self, input_dim, encoding_dim, hidden_dims):
        super(Autoencoder, self).__init__()
        encoder_layers = []
        current_dim = input_dim
        for hidden_dim in hidden_dims:
            encoder_layers.append(nn.Linear(current_dim, hidden_dim))
            encoder_layers.append(nn.ReLU())
            current_dim = hidden_dim
        encoder_layers.append(nn.Linear(current_dim, encoding_dim))
        encoder_layers.append(nn.ReLU())
        self.encoder = nn.Sequential(*encoder_layers)
        
        decoder_layers = []
        current_dim = encoding_dim
        for hidden_dim in reversed(hidden_dims):
            decoder_layers.append(nn.Linear(current_dim, hidden_dim))
            decoder_layers.append(nn.ReLU())
            current_dim = hidden_dim
        decoder_layers.append(nn.Linear(current_dim, input_dim))
        decoder_layers.append(nn.Sigmoid())
        self.decoder = nn.Sequential(*decoder_layers)

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return encoded, decoded

# Define Convolutional Layers with Batch Normalization using nn.Sequential
class ConvLayers(nn.Module):
    def __init__(self, input_size, hidden_size, kernel_size, dropout):
        super(ConvLayers, self).__init__()
        self.layers = nn.Sequential(
            nn.Conv1d(input_size, hidden_size, kernel_size, padding=(kernel_size - 1) // 2),
            nn.BatchNorm1d(hidden_size),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Conv1d(hidden_size, hidden_size, kernel_size, padding=(kernel_size - 1) // 2),
            nn.BatchNorm1d(hidden_size),
            nn.ReLU(),
            nn.Dropout(dropout)
        )

    def forward(self, x):
        x = x.permute(0, 2, 1)  # Change shape to (batch_size, input_size, seq_len)
        x = self.layers(x)
        x = x.permute(0, 2, 1)  # Change shape back to (batch_size, seq_len, hidden_size)
        return x

# Define LSTM Encoder
class LSTMEncoder(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, dropout):
        super(LSTMEncoder, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout)

    def forward(self, x):
        outputs, (hidden, cell) = self.lstm(x)
        return outputs, hidden, cell

# Define LSTM Decoder with Attention and Teacher Forcing
class LSTMDecoderWithAttention(nn.Module):
    def __init__(self, hidden_size, output_size, num_layers, dropout, context_size):
        super(LSTMDecoderWithAttention, self).__init__()
        self.attention = Attention(hidden_size)
        self.lstm = nn.LSTM(hidden_size + context_size, hidden_size, num_layers, batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, encoder_outputs, hidden, cell, current_context, targets=None, teacher_forcing_ratio=0.5):
        batch_size = encoder_outputs.size(0)
        seq_len = encoder_outputs.size(1)
        output_size = self.fc.out_features

        outputs = torch.zeros(batch_size, seq_len, output_size).to(encoder_outputs.device)
        input = current_context.unsqueeze(1)  # First input to the decoder is the current context

        for t in range(seq_len):
            attn_weights = self.attention(hidden[-1], encoder_outputs)
            context = torch.bmm(attn_weights.unsqueeze(1), encoder_outputs).squeeze(1)
            lstm_input = torch.cat([context, input.squeeze(1)], dim=1).unsqueeze(1)  # Add sequence dimension
            output, (hidden, cell) = self.lstm(lstm_input, (hidden, cell))
            output = self.fc(output.squeeze(1))
            outputs[:, t, :] = output

            teacher_force = targets is not None and torch.rand(1).item() < teacher_forcing_ratio
            input = targets[:, t, :].unsqueeze(1) if teacher_force else output.unsqueeze(1)

        return outputs, attn_weights

# Define Transformer Encoder
class TransformerEncoder(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_heads, dropout):
        super(TransformerEncoder, self).__init__()
        self.embedding = nn.Linear(input_size, hidden_size)
        self.positional_encoding = nn.Parameter(torch.zeros(1, 1000, hidden_size))  # Assuming max sequence length of 1000
        self.encoder_layers = nn.TransformerEncoderLayer(hidden_size, num_heads, hidden_size * 4, dropout)
        self.transformer_encoder = nn.TransformerEncoder(self.encoder_layers, num_layers)

    def forward(self, x):
        x = self.embedding(x) + self.positional_encoding[:, :x.size(1), :]
        x = self.transformer_encoder(x)
        return x

# Define Hybrid Model with Models in Series
class HybridForecastingModel(nn.Module):
    def __init__(self, input_dim, encoding_dim, hidden_dims, conv_hidden_size, lstm_hidden_size, transformer_hidden_size, context_size, output_size, lstm_num_layers, transformer_num_layers, transformer_num_heads, conv_kernel_size, dropout):
        super(HybridForecastingModel, self).__init__()
        self.autoencoder = Autoencoder(input_dim, encoding_dim, hidden_dims)
        self.conv_layers = ConvLayers(encoding_dim, conv_hidden_size, conv_kernel_size, dropout)
        self.lstm_encoder = LSTMEncoder(conv_hidden_size, lstm_hidden_size, lstm_num_layers, dropout)
        self.lstm_decoder = LSTMDecoderWithAttention(lstm_hidden_size, output_size, lstm_num_layers, dropout, context_size)
        self.transformer_encoder = TransformerEncoder(output_size, transformer_hidden_size, transformer_num_layers, transformer_num_heads, dropout)
        self.fc = nn.Linear(transformer_hidden_size, output_size)

    def forward(self, past_data, current_context, targets=None, teacher_forcing_ratio=0.5):
        encoded, _ = self.autoencoder(past_data)
        conv_outputs = self.conv_layers(encoded)
        encoder_outputs, hidden, cell = self.lstm_encoder(conv_outputs)
        lstm_output, attn_weights = self.lstm_decoder(encoder_outputs, hidden, cell, current_context, targets, teacher_forcing_ratio)
        transformer_outputs = self.transformer_encoder(lstm_output.unsqueeze(1))
        output = self.fc(transformer_outputs.squeeze(1))
        return output, attn_weights

# Training function for the hybrid model
def train_hybrid(model, train_loader, val_loader, n_epochs, optimizer, criterion, scheduler=None, patience=10, teacher_forcing_ratio=0.5):
    best_loss = float('inf')
    epochs_no_improve = 0
    early_stop = False

    for epoch in range(n_epochs):
        if early_stop:
            print("Early stopping")
            break

        model.train()
        total_loss = 0
        for batch in train_loader:
            optimizer.zero_grad()
            past_data, y_batch, context_data = batch
            output, _ = model(past_data, context_data, y_batch, teacher_forcing_ratio)  # Provide targets during training
            loss = criterion(output, y_batch)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
            total_loss += loss.item()

        if scheduler:
            scheduler.step()

        # Validation
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for batch in val_loader:
                past_data, y_batch, context_data = batch
                output, _ = model(past_data, context_data)  # Set targets to None during validation
                loss = criterion(output, y_batch)
                val_loss += loss.item()

        val_loss /= len(val_loader)
        print(f'Epoch [{epoch+1}/{n_epochs}], Loss: {total_loss/len(train_loader):.4f}, Val Loss: {val_loss:.4f}')

        # Early stopping
        if val_loss < best_loss:
            best_loss = val_loss
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1
            if epochs_no_improve >= patience:
                print("Early stopping triggered")
                early_stop = True

# Evaluation function for the hybrid model
def evaluate_hybrid(model, test_loader):
    model.eval()
    with torch.no_grad():
        hybrid_test_preds = []
        for batch in test_loader:
            past_data, _, context_data = batch
            hybrid_output, _ = model(past_data, context_data)  # Set targets to None during evaluation
            hybrid_test_preds.append(hybrid_output.cpu().numpy())
        hybrid_test_preds = np.concatenate(hybrid_test_preds, axis=0)
    return hybrid_test_preds

# Example usage
input_dim = 75
encoding_dim = 10
hidden_dims = [64, 32]
context_size = 10
conv_hidden_size = 64
lstm_hidden_size = 128
transformer_hidden_size = 256
output_size = 10
lstm_num_layers = 2
transformer_num_layers = 2
transformer_num_heads = 4
conv_kernel_size = 3
dropout = 0.5

model = HybridForecastingModel(input_dim, encoding_dim, hidden_dims, conv_hidden_size, lstm_hidden_size, transformer_hidden_size, context_size, output_size, lstm_num_layers, transformer_num_layers, transformer_num_heads, conv_kernel_size, dropout)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Assuming train_loader and val_loader are defined
# train_hybrid(model, train_loader, val_loader, n_epochs=50, optimizer, criterion, teacher_forcing_ratio=0.5)

# Assuming test_loader is defined
# hybrid_test_preds = evaluate_hybrid(model, test_loader)