In [14]:
import os
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
from sklearn.preprocessing import StandardScaler
import random
import matplotlib.pyplot as plt
from data import create_dataset_splits

train_df, forecast_df = create_dataset_splits(
  data_dir=r"datasets2025",
  processed_data_dir=r"processed",
  country_code="IT"
)

print("Shape: ", train_df.shape)

Shape:  (22632, 3399)


In [15]:
y_cols = [col[:20] == 'VALUEMWHMETERINGDATA' for col in train_df.columns]

X = train_df.iloc[:-1,:].copy()
Y = train_df.loc[:,y_cols].shift(-1).iloc[:-1,:].copy()
assert X.isna().sum().sum() == 0 and Y.isna().sum().sum() == 0

In [16]:
X.index = pd.to_datetime(X.index)

month = X.index.month
weekday = X.index.weekday
hour = X.index.hour

X.loc[:,'month_sin'] = np.sin(2 * np.pi * month / 12)
X.loc[:,'month_cos'] = np.cos(2 * np.pi * month / 12)
X.loc[:,'day_sin'] = np.sin(2 * np.pi * weekday / 7)
X.loc[:,'day_cos'] = np.cos(2 * np.pi * weekday / 7)
X.loc[:,'hour_sin'] = np.sin(2 * np.pi * hour / 24)
X.loc[:,'hour_cos'] = np.cos(2 * np.pi * hour / 24)

cols_cat = [
    'month_sin','month_cos',
    'day_sin','day_cos',
    'hour_sin','hour_cos'
            ]

In [17]:
# Set the seed for reproducibility
def set_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

def plot_loss(train_losses, val_losses):
    # Set up subplots
    fig, axes = plt.subplots(1, 2, figsize=(20, 6)) # 1 row, 3 columns
    # First plot: Loss evolution (val loss alone)
    axes[0].set_title('Loss evolution (val loss alone)')
    axes[0].plot(train_losses, label='validation', marker='o')
    axes[0].set_ylabel('Loss')
    axes[0].set_xlabel('Epochs')
    axes[0].legend()
    axes[1].set_title('Loss evolution (val loss alone)')
    axes[1].plot(val_losses, label='validation', marker='o')
    axes[1].set_ylabel('Loss')
    axes[1].set_xlabel('Epochs')
    axes[1].legend()

In [20]:
from xlstm.xlstm_large import xLSTMLarge as xLSTMModel

In [None]:
seed = 1  # You can choose any integer value
set_seed(seed)

# Split the data into training (first 80%) and validation (last 20%)

# Standardize the data
scaler_X = StandardScaler()
scaler_Y = StandardScaler()

# Fit the scalers on the training data and transform both training and validation data
X_train = scaler_X.fit_transform(X.drop(cols_cat, axis=1))
Y_train = scaler_Y.fit_transform(Y)

X_train_cat = X[cols_cat]

X_train = np.concatenate([X_train, X_train_cat], axis=1)

# Convert the data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
Y_train_tensor = torch.tensor(Y_train, dtype=torch.float32)

# Create DataLoader for batching
batch_size = 200

train_dataset = TensorDataset(X_train_tensor, Y_train_tensor)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# Model parameters
input_dim = X_train.shape[1]  # Number of features (e.g., energy consumption + naive forecasts)
hidden_dim = 168  # Increased hidden dimension for better capacity # (24,168)
output_dim = Y_train.shape[1]  # Number of firms (or target variables)
num_layers = 3  # Deeper LSTM
dropout = 0.3  # Dropout to prevent overfitting
max_grad_norm = 5.0  # Maximum gradient norm for clipping

# dropout = 0.5: BAD
# batch_size = 300: BAD

# Initialize the model, loss function, and optimizer
model = xLSTMModel(input_dim, hidden_dim, output_dim, num_layers, dropout)
criterion = nn.L1Loss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Early stopping parameters
num_epochs = 500
best_val_loss = float('inf')
patience = 25
epochs_no_improve = 0
best_model_path = "best_lstm_model.pth"

train_losses = []
val_losses = []

# Training loop with early stopping
for epoch in range(num_epochs):
    # Training phase
    model.train()
    train_loss = 0
    for X_batch, Y_batch in train_loader:
        optimizer.zero_grad()
        outputs = model(X_batch.unsqueeze(1))  # Add sequence dimension
        loss = criterion(outputs, Y_batch)
        loss.backward()

        # Gradient clipping
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)

        optimizer.step()
        train_loss += loss.item()

    train_loss /= len(train_loader)
    train_losses.append(train_loss)

    # Validation phase
    # model.eval()
    # val_loss = 0
    # with torch.no_grad():
    #     for X_batch, Y_batch in val_loader:
    #         outputs = model(X_batch.unsqueeze(1))  # Add sequence dimension
    #         loss = criterion(outputs, Y_batch)
    #         val_loss += loss.item()

    # val_loss /= len(val_loader)
    # val_losses.append(val_loss)

    print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}")

    # Early stopping logic
    # if val_loss < best_val_loss:
    #     best_val_loss = val_loss
    #     epochs_no_improve = 0
    #     torch.save(model.state_dict(), best_model_path)  # Save the best model
    # else:
    #     epochs_no_improve += 1
    #     if epochs_no_improve >= patience:
    #         print("Early stopping triggered!")
    #         break

# Load the best model
# model.load_state_dict(torch.load(best_model_path))
print("Best model loaded.")
# plot_loss(train_losses, val_losses)

TypeError: xLSTMLarge.__init__() takes 2 positional arguments but 6 were given