1. Designing AI-Powered Emissions Anomaly Detection

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

# Define the LSTM Autoencoder
class LSTMAutoencoder(nn.Module):
    def __init__(self, input_dim=5, hidden_dim=32, n_layers=1):
        super(LSTMAutoencoder, self).__init__()
        self.encoder = nn.LSTM(input_dim, hidden_dim, n_layers, batch_first=True)
        self.decoder = nn.LSTM(hidden_dim, hidden_dim, n_layers, batch_first=True)
        self.output_layer = nn.Linear(hidden_dim, input_dim)

    def forward(self, x):
        # x: (batch_size, seq_len, input_dim)
        batch_size, seq_len, _ = x.size()

        # Encode
        _, (hidden, cell) = self.encoder(x)

        # Repeat the hidden state for each timestep in the sequence
        decoder_input = hidden.repeat(seq_len, 1, 1).permute(1, 0, 2)

        # Decode
        decoded_seq, _ = self.decoder(decoder_input, (hidden, cell))

        # Project back to original input dimension
        out = self.output_layer(decoded_seq)
        return out


The encoder reads in the full sequence and compresses it into a final hidden state.

We then use that hidden state to initialize the decoder, which reconstructs the sequence.

Since the decoder needs one input per timestep, we repeat the hidden state across the sequence length. This is a simple way to decode without using teacher forcing or autoregressive decoding, which can be added later for better results.

The output layer maps the hidden dimension back to the input size.



In [2]:
# Parameters
input_dim = 5
seq_len = 20
batch_size = 32
num_epochs = 10

# Generate simulated time-series data
np.random.seed(42)
sim_data = np.sin(np.linspace(0, 100, seq_len)) + np.random.normal(0, 0.1, seq_len)
sim_data = np.tile(sim_data.reshape(1, seq_len, 1), (batch_size, 1, input_dim))  # shape: (batch, seq_len, input_dim)
train_data = torch.tensor(sim_data, dtype=torch.float32)

# Initialize model, loss, optimizer
model = LSTMAutoencoder(input_dim=input_dim)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()

    output = model(train_data)
    loss = criterion(output, train_data)

    loss.backward()
    optimizer.step()

    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {loss.item():.4f}")


Epoch 1/10, Loss: 0.5110
Epoch 2/10, Loss: 0.5095
Epoch 3/10, Loss: 0.5081
Epoch 4/10, Loss: 0.5069
Epoch 5/10, Loss: 0.5057
Epoch 6/10, Loss: 0.5046
Epoch 7/10, Loss: 0.5036
Epoch 8/10, Loss: 0.5027
Epoch 9/10, Loss: 0.5019
Epoch 10/10, Loss: 0.5011


Training Progress:
The model's reconstruction loss (MSE) is decreasing gradually over epochs.
This indicates that the autoencoder is learning to better reconstruct the input time series.
A slow but consistent drop in loss suggests stable learning and no signs of overfitting or divergence.
