In [11]:
from transformers import AutoformerConfig, AutoformerModel
import os
import sys
sys.path.insert(1, '../src/')
from config import raw_data_path, univariate_data_path, processed_data_path, models_path
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset
from sklearn.model_selection import train_test_split


In [12]:
data_file = os.path.join(univariate_data_path, 'merged_univariate.npy')
data = np.load(data_file, allow_pickle=True)

In [13]:
lengths = [len(record['signal']) for record in data]

# Compute and print stats
min_len = np.min(lengths)
max_len = np.max(lengths)
avg_len = np.mean(lengths)

print(f"Shortest sequence: {min_len} samples ({min_len / 20 / 60:.2f} minutes)")
print(f"Longest sequence: {max_len} samples ({max_len / 20 / 60:.2f} minutes)")
print(f"Average length: {avg_len:.0f} samples ({avg_len / 20 / 60:.2f} minutes)")

# Count how many sequences are shorter than 10 minutes (10 * 60 * 20 = 12,000 samples)
too_short = sum(len(record['signal']) < 12000 for record in data)

print(f"Number of sequences shorter than 10 minutes: {too_short}")
print(f"Total number of sequences: {len(data)}")
print(f"Percentage too short: {100 * too_short / len(data):.2f}%")


Shortest sequence: 1793 samples (1.49 minutes)
Longest sequence: 1002000 samples (835.00 minutes)
Average length: 170784 samples (142.32 minutes)
Number of sequences shorter than 10 minutes: 7
Total number of sequences: 666
Percentage too short: 1.05%


In [14]:
# Keep only signals that are >= 10 minutes (12000 samples at 20Hz)
filtered_data = [record for record in data if len(record['signal']) >= 12000]

print(f"Filtered dataset size: {len(filtered_data)} (out of {len(data)})")


Filtered dataset size: 659 (out of 666)


In [15]:
train_data, temp_data = train_test_split(filtered_data, test_size=0.2, shuffle=True, random_state=42)
val_data, test_data = train_test_split(temp_data, test_size=0.5, shuffle=True, random_state=42)


In [17]:
# Create a Forecasting Dataset

class ForecastingDataset(Dataset):
    def __init__(self, data, input_window=12000, forecast_horizon=1200, stride=6000):
        self.samples = []
        for record in data:
            signal = record['signal']
            total_len = input_window + forecast_horizon
            for start in range(0, len(signal) - total_len + 1, stride):
                input_seq = signal[start:start + input_window]
                target_seq = signal[start + input_window:start + total_len]
                self.samples.append((input_seq, target_seq))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        input_seq, target_seq = self.samples[idx]
        return (
            torch.tensor(input_seq, dtype=torch.float32).unsqueeze(-1),
            torch.tensor(target_seq, dtype=torch.float32).unsqueeze(-1)
        )

train_dataset = ForecastingDataset(
    data=train_data,
    input_window=12000,
    forecast_horizon=1200,
    stride=6000
)

val_dataset = ForecastingDataset(
    data=val_data,
    input_window=12000,
    forecast_horizon=1200,
    stride=6000
)

test_dataset = ForecastingDataset(
    data=test_data,
    input_window=12000,
    forecast_horizon=1200,
    stride=6000
)

# Step 3: Create DataLoaders for Each Dataset
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=16, shuffle=True)
val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=16, shuffle=False)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=16, shuffle=False)

# Example of fetching a batch
x, y = next(iter(train_dataloader))
print(x.shape)  # (batch_size, input_window, 1)
print(y.shape)  # (batch_size, forecast_horizon, 1)

torch.Size([16, 12000, 1])
torch.Size([16, 1200, 1])


In [22]:
# Initialize AutoformerConfig & Autoformer model

config = AutoformerConfig(
    prediction_length=1200,   
    hidden_size=256,
    num_attention_heads=4,
    num_hidden_layers=3,
    intermediate_size=512,
    layer_norm_eps=1e-12,
    max_position_embeddings=12000,
    num_labels=1,
    output_attentions=False,
    output_hidden_states=False,
    num_static_real_features=0,
    num_static_categorical_features=0   
)



# Randomly initializing a model (with random weights) from the configuration
model = AutoformerModel(config)
print(f"Model type: {type(model)}")

print(model)

Model type: <class 'transformers.models.autoformer.modeling_autoformer.AutoformerModel'>
AutoformerModel(
  (scaler): AutoformerMeanScaler()
  (encoder): AutoformerEncoder(
    (value_embedding): AutoformerValueEmbedding(
      (value_projection): Linear(in_features=9, out_features=256, bias=False)
    )
    (embed_positions): AutoformerSinusoidalPositionalEmbedding(2400, 256)
    (layers): ModuleList(
      (0-2): 3 x AutoformerEncoderLayer(
        (self_attn): AutoformerAttention(
          (k_proj): Linear(in_features=256, out_features=256, bias=True)
          (v_proj): Linear(in_features=256, out_features=256, bias=True)
          (q_proj): Linear(in_features=256, out_features=256, bias=True)
          (out_proj): Linear(in_features=256, out_features=256, bias=True)
        )
        (self_attn_layer_norm): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
        (activation_fn): GELUActivation()
        (fc1): Linear(in_features=256, out_features=32, bias=True)
        (fc2

In [25]:
# # Add a Forecasting Head

# class AutoformerForForecasting(nn.Module):
#     def __init__(self, model, output_size):
#         super(AutoformerForForecasting, self).__init__()
#         self.autoformer = model
#         self.output_layer = nn.Linear(self.autoformer.config.hidden_size, output_size)

#     def forward(self, past_values, past_time_features=None, past_observed_mask=None, static_real_features=None, static_categorical_features=None):
#         print(f"past_values.shape: {past_values.shape}")
#         print(f"past_time_features.shape: {past_time_features.shape if past_time_features is not None else 'None'}")
#         print(f"past_observed_mask.shape: {past_observed_mask.shape if past_observed_mask is not None else 'None'}")
#         if static_real_features is not None:
#             print(f"static_real_features.shape: {static_real_features.shape}")
#         if static_categorical_features is not None:
#             print(f"static_categorical_features.shape: {static_categorical_features.shape}")
        
#         if self.autoformer.config.num_static_real_features == 0:
#             static_real_features = None
#         if self.autoformer.config.num_static_categorical_features == 0:
#             static_categorical_features = None

#         # Pass through the Autoformer model (only past_values if it's the required input)
#         encoder_output = self.autoformer(past_values)[0]  # (batch_size, seq_len, hidden_size)
        
#         # Use the last hidden state for forecasting
#         forecast = self.output_layer(encoder_output[:, -1, :])  # (batch_size, forecast_horizon)
#         return forecast


class AutoformerForForecasting(nn.Module):
    def __init__(self, model, output_size):
        super(AutoformerForForecasting, self).__init__()
        self.autoformer = model
        self.output_layer = nn.Linear(config.hidden_size, output_size)

    def forward(self, past_values, past_time_features, past_observed_mask, static_real_features=None, static_categorical_features=None):
        # Forward pass through the Autoformer model
        encoder_output = self.autoformer(
            past_values=past_values,
            past_time_features=past_time_features,
            past_observed_mask=past_observed_mask,
            static_real_features=static_real_features,
            static_categorical_features=static_categorical_features
        )[0]  # Output from the encoder (batch_size, seq_len, hidden_size)

        # Use the last hidden state to make the forecast
        forecast = self.output_layer(encoder_output[:, -1, :])  # Output from the last time step
        return forecast



In [26]:
# # Define Loss Function & Optimizer
# # Use MSE and ADAM-optimizer

# # Loss function (Mean Squared Error)
# loss_fn = nn.MSELoss()

# # Optimizer (Adam optimizer with learning rate)
# optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)



In [28]:
# # Training Loop

# def train(model, dataloader, epochs):
#     model.train()
#     for epoch in range(epochs):
#         epoch_loss = 0
#         for batch_idx, (x, y) in enumerate(dataloader):
#             batch_size = x.size(0)
#             print(f"x.shape: {x.shape}")
#             print(f"y.shape: {y.shape}")

#             # Dummy time features and mask
#             past_time_features = torch.zeros_like(x).to(x.device)
#             past_observed_mask = torch.ones_like(x).to(x.device)

#             # print(f"x.shape: {x.shape}")
#             # print(f"past_time_features.shape: {past_time_features.shape}")
#             # print(f"past_observed_mask.shape: {past_observed_mask.shape}")
#             print(f"Model type: {type(model)}")

#             output = model(
#                 past_values=x,
#                 past_time_features=past_time_features,
#                 past_observed_mask=past_observed_mask
#             )
#             forecast = output.prediction

#             loss = loss_fn(forecast, y)

#             optimizer.zero_grad()
#             loss.backward()
#             optimizer.step()

#             epoch_loss += loss.item()

#         print(f"Epoch [{epoch + 1}/{epochs}], Loss: {epoch_loss / len(dataloader)}")

# # Train the model
# train(model, train_dataloader, epochs=10)

import torch.optim as optim
import torch.nn.functional as F

# Loss function
def mse_loss(pred, true):
    return F.mse_loss(pred, true)

# Training Loop
def train_autoformer(model, dataloader, epochs=10, learning_rate=1e-4):
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    model.train()

    for epoch in range(epochs):
        epoch_loss = 0.0
        for batch_idx, (x, y) in enumerate(dataloader):
            optimizer.zero_grad()
            print(f"Model type: {type(model)}")
            # Pass data through the model
            forecast = model(
                past_values=x, 
                past_time_features=x,  # You can adjust the time features if needed
                past_observed_mask=torch.ones_like(x)  # Modify this as necessary
            )
            
            # Calculate loss
            loss = mse_loss(forecast, y)
            epoch_loss += loss.item()

            # Backpropagation
            loss.backward()
            optimizer.step()

        print(f"Epoch [{epoch + 1}/{epochs}], Loss: {epoch_loss / len(dataloader)}")


# Train the model on the forecasting task
train_autoformer(model, train_dataloader, epochs=10)


Model type: <class 'transformers.models.autoformer.modeling_autoformer.AutoformerModel'>


RuntimeError: expand(torch.FloatTensor{[16, 1, 2, 1]}, size=[-1, 11993, -1]): the number of sizes provided (3) must be greater or equal to the number of dimensions in the tensor (4)

In [None]:
# Save the trained model
save_path = os.path.join(models_path, "autoformer_forecasting_model.pth")
torch.save(model.state_dict(), save_path)
