More recent models, such as TSMixer, TFT and NHITS achieve better accuracy than LSTM in most settings.

In [2]:
# Set file paths
csv_path = '../data/selected_sensors2_cleaned.csv'

# Import pipeline module
# import sys
# sys.path.append('/content/drive/MyDrive/airkaz/')
# from MLForecastPipeline import *

# Load CSV
import pandas as pd
selected_sensors_df = pd.read_csv(csv_path, index_col=0)


In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_percentage_error
import matplotlib.pyplot as plt

class AttentiveDilatedRNNCell(nn.Module):
    def __init__(self, input_size, hidden_size, dilation=1):
        super().__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.dilation = dilation

        self.rnn_cell = nn.GRUCell(input_size, hidden_size)
        self.attn = nn.Linear(hidden_size + input_size, 1)

    def forward(self, x_t, hidden_state):
        if hidden_state is None:
            hidden_state = torch.zeros(x_t.size(0), self.hidden_size, device=x_t.device)
        h_t = self.rnn_cell(x_t, hidden_state)
        attn_input = torch.cat([x_t, h_t], dim=-1)
        alpha = torch.sigmoid(self.attn(attn_input))
        h_t_attn = alpha * h_t + (1 - alpha) * hidden_state
        return h_t_attn

class StackedADRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, dilations=(2, 4, 7)):
        super().__init__()
        self.cells = nn.ModuleList([
            AttentiveDilatedRNNCell(input_size if i == 0 else hidden_size, hidden_size, d)
            for i, d in enumerate(dilations)
        ])
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        batch_size, seq_len, _ = x.size()
        h = None
        for cell in self.cells:
            outputs = []
            h = None
            for t in range(seq_len):
                h = cell(x[:, t, :], h)
                outputs.append(h.unsqueeze(1))
            x = torch.cat(outputs, dim=1)
        out = self.fc(x[:, -1, :])
        return out

In [6]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_percentage_error
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, TensorDataset, random_split
import optuna
import os

class AttentiveDilatedRNNCell(nn.Module):
    def __init__(self, input_size, hidden_size, dilation=1):
        super().__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.dilation = dilation

        self.rnn_cell = nn.GRUCell(input_size, hidden_size)
        self.attn = nn.Linear(hidden_size + input_size, 1)

    def forward(self, x_t, hidden_state):
        if hidden_state is None:
            hidden_state = torch.zeros(x_t.size(0), self.hidden_size, device=x_t.device)
        h_t = self.rnn_cell(x_t, hidden_state)
        attn_input = torch.cat([x_t, h_t], dim=-1)
        alpha = torch.sigmoid(self.attn(attn_input))
        h_t_attn = alpha * h_t + (1 - alpha) * hidden_state
        return h_t_attn

class StackedADRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, dilations):
        super().__init__()
        self.cells = nn.ModuleList([
            AttentiveDilatedRNNCell(input_size if i == 0 else hidden_size, hidden_size, d)
            for i, d in enumerate(dilations)
        ])
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        batch_size, seq_len, _ = x.size()
        h = None
        for cell in self.cells:
            outputs = []
            h = None
            for t in range(seq_len):
                h = cell(x[:, t, :], h)
                outputs.append(h.unsqueeze(1))
            x = torch.cat(outputs, dim=1)
        out = self.fc(x[:, -1, :])
        return out

def prepare_dataset(df, input_size, horizon):
    X, y = [], []
    for i in range(len(df) - input_size - horizon):
        X.append(df['y_scaled'].values[i:i+input_size])
        y.append(df['y_scaled'].values[i+input_size:i+input_size+horizon])
    X = torch.tensor(np.array(X), dtype=torch.float32).unsqueeze(-1)
    y = torch.tensor(np.array(y), dtype=torch.float32)
    return TensorDataset(X, y)

def forecast_direct(model, series, input_size, horizon):
    model.eval()
    preds = []
    i = 0
    while i + input_size + horizon <= len(series):
        window = series[i:i+input_size]
        window_tensor = torch.tensor(np.array(window), dtype=torch.float32).unsqueeze(0).unsqueeze(-1)
        with torch.no_grad():
            forecast = model(window_tensor).squeeze().cpu().numpy()
        preds.extend(forecast)
        i += horizon
    return np.array(preds)

  from .autonotebook import tqdm as notebook_tqdm


In [7]:
df

Unnamed: 0,ds,y,5,6,unique_id,y_scaled
0,2017-03-22,40.683844,39.022917,33.550382,sensor_2,-0.114400
1,2017-03-23,29.237465,24.606322,23.765278,sensor_2,-0.435274
2,2017-03-24,43.675636,32.662021,24.127526,sensor_2,-0.030532
3,2017-03-25,58.792217,73.688502,57.902710,sensor_2,0.393228
4,2017-03-26,48.348401,41.901811,28.811111,sensor_2,0.100459
...,...,...,...,...,...,...
1291,2020-10-03,50.642450,43.423905,28.360000,sensor_2,0.164767
1292,2020-10-04,30.410000,17.520000,21.340000,sensor_2,-0.402404
1293,2020-10-05,29.590000,16.530000,20.040000,sensor_2,-0.425391
1294,2020-10-06,26.380000,13.220000,17.600000,sensor_2,-0.515377


In [None]:
df = pd.read_csv('../data/selected_sensors2_cleaned.csv', index_col=0)
df = df.rename(columns={'full_date': 'ds', '2': 'y'})
df['ds'] = pd.to_datetime(df['ds'])
df['unique_id'] = 'sensor_2'
NUM_TRIALS = 20
TRAIN_SIZE = 0.6
VAL_SIZE = 0.2
TEST_SIZE = 0.2

scaler = StandardScaler()
df['y_scaled'] = scaler.fit_transform(df[['y']])
horizons = [7, 14, 30, 60, 90, 180, 365]
total_trials = len(horizons) * NUM_TRIALS  # 10 trials per horizon
print(f"Total combinations to explore: {total_trials}\n")

results_summary = {}

for horizon in horizons:
    print(f"=== Horizon: {horizon} ===")

    def objective(trial):
        input_size = trial.suggest_int("input_size", min(30, horizon), min(365, horizon * 2))
        hidden_size = trial.suggest_categorical("hidden_size", [32, 64, 128, 256])
        learning_rate = trial.suggest_float("lr", 1e-4, 1e-2, log=True)
        dilations = trial.suggest_categorical("dilations", [(1, 2, 4), (2, 4, 7), (4, 8, 16)])

        dataset = prepare_dataset(df, input_size, horizon)
        train_ds, val_ds, test_ds = random_split(dataset, [TRAIN_SIZE, VAL_SIZE, TEST_SIZE])
        train_loader = DataLoader(train_ds, batch_size=64, shuffle=False)
        val_loader = DataLoader(val_ds, batch_size=64)

        model = StackedADRNN(input_size=1, hidden_size=hidden_size, output_size=horizon, dilations=dilations)
        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
        criterion = nn.SmoothL1Loss()

        best_val_loss = float('inf')
        patience = 8
        trigger_times = 0
        best_epoch = 0

        for epoch in range(75):
            model.train()
            for xb, yb in train_loader:
                optimizer.zero_grad()
                preds = model(xb)
                loss = criterion(preds, yb)
                loss.backward()
                optimizer.step()

            model.eval()
            val_losses = []
            with torch.no_grad():
                for xb, yb in val_loader:
                    preds = model(xb)
                    loss = criterion(preds, yb)
                    val_losses.append(loss.item())
            avg_val_loss = np.mean(val_losses)

            if avg_val_loss < best_val_loss:
                best_val_loss = avg_val_loss
                trigger_times = 0
                best_epoch = epoch
            else:
                trigger_times += 1
                if trigger_times >= patience:
                    break
        trial.set_user_attr("best_epoch", best_epoch)

        return best_val_loss

    study = optuna.create_study(direction="minimize")
    study.optimize(objective, n_trials=NUM_TRIALS)

    best_params = study.best_trial.params
    results_summary[horizon] = {
        "loss": study.best_value,
        "params": best_params,
        'epoch': study.best_trial.user_attrs["best_epoch"]
    }
    print(f"Best Loss for Horizon {horizon}: {study.best_value:.4f}")
    print(f"Best Params: {best_params}\n")

print("=== Summary of Best Results per Horizon ===")
all_results = []
for h, res in results_summary.items():
    print(f"Horizon {h} → Loss: {res['loss']:.4f}, Params: {res['params']}")

    # Forecast with best model
    best_params = res['params']
    input_size = best_params['input_size']
    hidden_size = best_params['hidden_size']
    dilations = best_params['dilations']
    lr = best_params['lr']
    best_epoch = res['epoch']

    model = StackedADRNN(input_size=1, hidden_size=hidden_size, output_size=h, dilations=dilations)
    dataset = prepare_dataset(df, input_size, h)
    train_ds, val_ds, test_ds = random_split(dataset, [TRAIN_SIZE, VAL_SIZE, TEST_SIZE])
    
    train_loader = DataLoader(train_ds, batch_size=64, shuffle=False)

    combined_ds = torch.utils.data.ConcatDataset([train_ds, val_ds])
    combined_loader = DataLoader(combined_ds, batch_size=64, shuffle=False)

    model = StackedADRNN(input_size=1, hidden_size=hidden_size, output_size=h, dilations=dilations)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = nn.SmoothL1Loss()

    for epoch in range(best_epoch+1):
        model.train()
        for xb, yb in combined_loader:
            optimizer.zero_grad()
            loss = criterion(model(xb), yb)
            loss.backward()
            optimizer.step()

    # Extract actual test data as a clean series
    test_inputs = []
    test_targets = []
    for x, y in test_ds:
        test_inputs.append(x.squeeze().numpy())  # shape [input_size]
        test_targets.append(y.squeeze().numpy())  # shape [horizon]

    test_inputs = np.array(test_inputs)
    test_targets = np.array(test_targets)

    # Build rolling test series: flatten the test_targets to create a series
    test_series = np.concatenate((test_inputs[0], test_targets.flatten()))
    test_series_scaled = scaler.transform(test_series.reshape(-1, 1)).flatten()

    # Forecast on test set using best model
    preds_scaled = forecast_direct(model, test_series_scaled, input_size, h)
    preds = scaler.inverse_transform(preds_scaled.reshape(-1, 1)).squeeze()
    actual = test_series[input_size:]  # skip initial input

    # Evaluate for different test lengths
    max_test_length = len(preds)
    test_lengths = list(range(30, 181, 30)) + [240, 300, 360, 480, 600, 720, max_test_length]
    test_lengths = [t for t in test_lengths if t <= max_test_length]

    error_dict = {}
    for l in test_lengths:
        error_dict[f"MAPE_{l}d"] = mean_absolute_percentage_error(actual[:l], preds[:l])

    all_results.append({
        "Horizon": h,
        "input_size": input_size,
        "hidden_size": hidden_size,
        "lr": lr,
        "dilations": str(dilations),
        **error_dict
    })
# Save to CSV
results_df = pd.DataFrame(all_results)
results_df.to_csv("results/run_11/adrnn_results.csv", index=False)
print("\nEvaluation results saved to adrnn_results.csv")


In [8]:
def run_for_horizon(h, df, scaler, num_trials=20):
    print(f"=== Horizon: {h} ===")

    results = {}

    def objective(trial):
        input_size = trial.suggest_int("input_size", min(30, h), min(365, h * 2))
        hidden_size = trial.suggest_categorical("hidden_size", [32, 64, 128, 256])
        learning_rate = trial.suggest_float("lr", 1e-4, 1e-2, log=True)
        dilations = trial.suggest_categorical("dilations", [(1, 2, 4), (2, 4, 7), (4, 8, 16)])

        dataset = prepare_dataset(df, input_size, h)
        train_ds, val_ds, test_ds = random_split(dataset, [TRAIN_SIZE, VAL_SIZE, TEST_SIZE])
        train_loader = DataLoader(train_ds, batch_size=64, shuffle=False)
        val_loader = DataLoader(val_ds, batch_size=64)

        model = StackedADRNN(input_size=1, hidden_size=hidden_size, output_size=h, dilations=dilations)
        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
        criterion = nn.SmoothL1Loss()

        best_val_loss = float('inf')
        patience = 8
        trigger_times = 0
        best_epoch = 0

        for epoch in range(75):
            model.train()
            for xb, yb in train_loader:
                optimizer.zero_grad()
                preds = model(xb)
                loss = criterion(preds, yb)
                loss.backward()
                optimizer.step()

            model.eval()
            val_losses = []
            with torch.no_grad():
                for xb, yb in val_loader:
                    preds = model(xb)
                    loss = criterion(preds, yb)
                    val_losses.append(loss.item())
            avg_val_loss = np.mean(val_losses)

            if avg_val_loss < best_val_loss:
                best_val_loss = avg_val_loss
                trigger_times = 0
                best_epoch = epoch
            else:
                trigger_times += 1
                if trigger_times >= patience:
                    break
        trial.set_user_attr("best_epoch", best_epoch)
        return best_val_loss

    study = optuna.create_study(direction="minimize")
    study.optimize(objective, n_trials=num_trials)

    best_params = study.best_trial.params
    best_epoch = study.best_trial.user_attrs["best_epoch"]

    # Retrain final model
    input_size = best_params['input_size']
    hidden_size = best_params['hidden_size']
    dilations = best_params['dilations']
    lr = best_params['lr']

    dataset = prepare_dataset(df, input_size, h)
    train_ds, val_ds, test_ds = random_split(dataset, [TRAIN_SIZE, VAL_SIZE, TEST_SIZE])
    combined_ds = torch.utils.data.ConcatDataset([train_ds, val_ds])
    combined_loader = DataLoader(combined_ds, batch_size=64, shuffle=False)

    model = StackedADRNN(input_size=1, hidden_size=hidden_size, output_size=h, dilations=dilations)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = nn.SmoothL1Loss()

    for epoch in range(best_epoch + 1):
        model.train()
        for xb, yb in combined_loader:
            optimizer.zero_grad()
            loss = criterion(model(xb), yb)
            loss.backward()
            optimizer.step()

    # Forecast
    test_inputs, test_targets = [], []
    for x, y in test_ds:
        test_inputs.append(x.squeeze().numpy())
        test_targets.append(y.squeeze().numpy())

    test_series = np.concatenate((test_inputs[0], np.concatenate(test_targets)))
    test_series_scaled = scaler.transform(test_series.reshape(-1, 1)).flatten()

    preds_scaled = forecast_direct(model, test_series_scaled, input_size, h)
    preds = scaler.inverse_transform(preds_scaled.reshape(-1, 1)).squeeze()
    actual = test_series[input_size:]

    max_test_length = len(preds)
    test_lengths = list(range(30, 181, 30)) + [240, 300, 360, 480, 600, 720, max_test_length]
    test_lengths = [t for t in test_lengths if t <= max_test_length]

    error_dict = {}
    for l in test_lengths:
        error_dict[f"MAPE_{l}d"] = mean_absolute_percentage_error(actual[:l], preds[:l])

    return {
        "Horizon": h,
        "input_size": input_size,
        "hidden_size": hidden_size,
        "lr": lr,
        "dilations": str(dilations),
        "best_epoch": best_epoch,
        **error_dict
    }


In [None]:
from joblib import Parallel, delayed
from sklearn.preprocessing import StandardScaler

# Load data
df = pd.read_csv('../data/selected_sensors2_cleaned.csv', index_col=0)
df = df.rename(columns={'full_date': 'ds', '2': 'y'})
df['ds'] = pd.to_datetime(df['ds'])
df['unique_id'] = 'sensor_2'
scaler = StandardScaler()
df['y_scaled'] = scaler.fit_transform(df[['y']])

horizons = [7, 14, 30, 60, 90, 180, 365]
NUM_TRIALS = 20

# Run in parallel
results = Parallel(n_jobs=len(horizons))(delayed(run_for_horizon)(h, df, scaler, NUM_TRIALS) for h in horizons)

# Save
results_df = pd.DataFrame(results)
results_df.to_csv("results/run_11/adrnn_parallel_results.csv", index=False)
print("✅ Results saved to results/run_11/adrnn_parallel_results.csv")
