In [2]:
from transformers import AutoformerConfig, AutoformerModel
import os
import sys
sys.path.insert(1, '../src/')
from config import raw_data_path, univariate_data_path, processed_data_path, models_path
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim
from transformers import AutoformerConfig, AutoformerForPrediction
from torch.utils.data import Dataset
from sklearn.model_selection import train_test_split


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
data_file = os.path.join(univariate_data_path, 'merged_univariate.npy')
data = np.load(data_file, allow_pickle=True)

In [4]:
lengths = [len(record['signal']) for record in data]

# Compute and print stats
min_len = np.min(lengths)
max_len = np.max(lengths)
avg_len = np.mean(lengths)

print(f"Shortest sequence: {min_len} samples ({min_len / 20 / 60:.2f} minutes)")
print(f"Longest sequence: {max_len} samples ({max_len / 20 / 60:.2f} minutes)")
print(f"Average length: {avg_len:.0f} samples ({avg_len / 20 / 60:.2f} minutes)")

# Count how many sequences are shorter than 10 minutes (10 * 60 * 20 = 12,000 samples)
too_short = sum(len(record['signal']) < 12000 for record in data)
print(data[0]['signal'])
print(f"Number of sequences shorter than 10 minutes: {too_short}")
print(f"Total number of sequences: {len(data)}")
print(f"Percentage too short: {100 * too_short / len(data):.2f}%")


Shortest sequence: 17 samples (0.01 minutes)
Longest sequence: 100200 samples (83.50 minutes)
Average length: 35766 samples (29.80 minutes)
[[-1.7358303 ]
 [-0.30347557]
 [-0.40749874]
 ...
 [-3.09738299]
 [-2.90981482]
 [-3.22768386]]
Number of sequences shorter than 10 minutes: 83
Total number of sequences: 666
Percentage too short: 12.46%


In [5]:
# Keep only signals that are >= 10 minutes (12000 samples at 20Hz)
filtered_data = [record for record in data if len(record['signal']) >= 12000]

print(f"Filtered dataset size: {len(filtered_data)} (out of {len(data)})")


Filtered dataset size: 583 (out of 666)


In [6]:
train_data, temp_data = train_test_split(filtered_data, test_size=0.2, shuffle=True, random_state=42)
val_data, test_data = train_test_split(temp_data, test_size=0.5, shuffle=True, random_state=42)


In [7]:
# Create a Forecasting Dataset
class ForecastingDataset(Dataset):
    def __init__(self, data, input_window=12000, forecast_horizon=1200, stride=6000):
        self.samples = []
        for record in data:
            signal = record['signal']
            total_len = input_window + forecast_horizon
            for start in range(0, len(signal) - total_len + 1, stride):
                input_seq = signal[start:start + input_window]
                target_seq = signal[start + input_window:start + total_len]
                self.samples.append((input_seq, target_seq))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        input_seq, target_seq = self.samples[idx]

        input_tensor = torch.tensor(input_seq, dtype=torch.float32)
        target_tensor = torch.tensor(target_seq, dtype=torch.float32)

        # Ensure tensors are 3D: (seq_len, num_features)
        if input_tensor.ndim == 1:
            input_tensor = input_tensor.unsqueeze(-1)
        if target_tensor.ndim == 1:
            target_tensor = target_tensor.unsqueeze(-1)

        return input_tensor, target_tensor

train_dataset = ForecastingDataset(
    data=train_data,
    input_window=12000,
    forecast_horizon=1200,
    stride=6000
)

val_dataset = ForecastingDataset(
    data=val_data,
    input_window=12000,
    forecast_horizon=1200,
    stride=6000
)

test_dataset = ForecastingDataset(
    data=test_data,
    input_window=12000,
    forecast_horizon=1200,
    stride=6000
)

# Step 3: Create DataLoaders for Each Dataset
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=16, shuffle=True)
val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=16, shuffle=False)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=16, shuffle=False)

# Example of fetching a batch
x, y = next(iter(train_dataloader))

print(x.shape)  # (batch_size, input_window, 1)
print(y.shape)  # (batch_size, forecast_horizon, 1)

torch.Size([16, 12000, 1])
torch.Size([16, 1200, 1])


In [8]:
# Initialize AutoformerConfig & Autoformer model

from transformers import AutoformerConfig

# Define the configuration for the Autoformer model
config = AutoformerConfig(
    input_size=1,
    input_length=12000,
    output_length=1200,
    prediction_length=1200,
    num_hidden_layers=3,
    num_attention_heads=8,
    d_model=64,
    dropout_rate=0.1, 
)

    # num_static_categorical_features=0,
    # num_static_real_features=1, 
from transformers import AutoformerForPrediction

# Initialize the model with the configuration
model = AutoformerForPrediction(config)


In [9]:
# import torch
# import torch.nn as nn

# class AutoformerForForecasting(nn.Module):
#     def __init__(self, model, output_size):
#         super(AutoformerForForecasting, self).__init__()
#         self.autoformer = model
#         self.output_layer = nn.Linear(self.autoformer.config.hidden_size, output_size)

#     def forward(self, past_values):
#         """
#         Forward pass through Autoformer.

#         Parameters:
#         - past_values: (batch_size, seq_len, input_size) Time series data

#         Returns:
#         - forecast: (batch_size, output_size) Forecasted values
#         """
#         if past_time_features is not None and past_observed_mask is not None:
#             # Handle the time features and observed mask here
#             # For example, add them to your input processing layers or combine them with past_values
#             pass
#         # Print the input shape for debugging
#         print(f"past_values.shape: {past_values.shape}")
        
#         # Pass the past_values through the Autoformer model
#         encoder_output = self.autoformer(past_values)[0]  # (batch_size, seq_len, hidden_size)
        
#         # Use the last hidden state for forecasting (last time step)
#         forecast = self.output_layer(encoder_output[:, -1, :])  # (batch_size, output_size)
#         return forecast

# model = AutoformerForForecasting(base_model, output_size=1200)  # Output size is the forecast horizon
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# model = model.to(device)

In [10]:
import torch
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.nn import MSELoss

# Define the training loop
def train(model, dataloader, epochs, loss_fn, optimizer, device):
    model.train()  # Set the model to training mode
    model.to(device)  # Move the model to the GPU or CPU

    for epoch in range(epochs):
        epoch_loss = 0
        for batch_idx, (x, y) in enumerate(dataloader):
            x, y = x.to(device), y.to(device)  # Move data to the same device as the model
            
            batch_size = x.size(0)
            sequence_length = x.size(1)
           

            print(f"x.shape: {x.shape}")  # Should be (batch_size, input_window, 1)
            print(f"y.shape: {y.shape}")  # Should be (batch_size, forecast_horizon, 1)

            # Create dummy past_time_features with shape (batch_size, input_window, 1)
            # Ensure the input dimensions are correct and consistent
            past_time_features = torch.zeros(batch_size, sequence_length, 1, device=device)  # Dummy tensor with the same shape as x
            # past_observed_mask shape should be [batch_size, sequence_length, 1]
            # past_observed_mask = torch.ones(batch_size, sequence_length, 1, dtype=torch.bool, device=device)  # Shape: [16, 12000, 1]
            past_observed_mask = torch.ones_like(x).int()  # Shape: [16, 12000, 1]

            # past_time_features = past_time_features.squeeze(-1)  # Remove the singleton dimension
            # past_observed_mask = past_observed_mask.squeeze(-1)  # Remove the singleton dimension
            # past_time_features = 

            # Print shape of tensors at the point of error
            print("past_values:", x.shape)
            print("past_time_features:", past_time_features.shape)
            print("past_observed_mask:", past_observed_mask.shape)
            batch_size = x.shape[0]
            device = x.device
            empty_static_cat = torch.empty(x.shape[0], 1, 0, dtype=torch.long, device=x.device)  # Empty categorical features (with batch size)
            empty_static_real = torch.empty(x.shape[0], 1, 1, dtype=torch.float32, device=x.device)
            # Trace the part of the model where the error occurs and inspect the expansion logic.
            # If needed, inspect the `expand` and `unsqueeze` operations inside the model to make sure they are applied correctly.

            # Forward pass: Model should forecast the next 1200 points (forecast_horizon)
            forecast = model(past_values=x, past_time_features=past_time_features, past_observed_mask=past_observed_mask)


            # Ensure forecast size is (batch_size, forecast_horizon, 1) and matches y
            assert forecast.shape == y.shape, f"Forecast shape: {forecast.shape}, y shape: {y.shape}"
            print("distribution shape:", distribution.shape)
            print("future_values shape:", future_values.shape)

            # Calculate loss
            loss = loss_fn(forecast, y)


            # Backpropagation
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            epoch_loss += loss.item()

        # Print loss at each epoch
        print(f"Epoch [{epoch + 1}/{epochs}], Loss: {epoch_loss / len(dataloader)}")

# Instantiate the loss function and optimizer
loss_fn = MSELoss()  # Mean Squared Error Loss for regression tasks
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Initialize device (GPU or CPU)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Define the training loop
train(model, train_dataloader, epochs=10, loss_fn=loss_fn, optimizer=optimizer, device=device)


x.shape: torch.Size([16, 12000, 1])
y.shape: torch.Size([16, 1200, 1])
past_values: torch.Size([16, 12000, 1])
past_time_features: torch.Size([16, 12000, 1])
past_observed_mask: torch.Size([16, 12000, 1])


RuntimeError: expand(torch.FloatTensor{[16, 1, 2, 1]}, size=[-1, 11993, -1]): the number of sizes provided (3) must be greater or equal to the number of dimensions in the tensor (4)

In [70]:
# Save the trained model
save_path = os.path.join(models_path, "autoformer_forecasting_model.pth")
torch.save(model.state_dict(), save_path)
