In [None]:
from google.colab import files
uploaded = files.upload()

In [None]:
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

In [None]:
import pandas as pd

# Load and rename columns
df = pd.read_csv("merged_energy_weather_data.csv", parse_dates=["Date"])
df.columns = [
    "date", "price", "temp", "precip", "wind", "humidity",
    "cloud", "radiation", "week_day", "month", "day_month"
]
print(df.shape)
df.head()

df = df.sort_values("date").reset_index(drop=True)
cloud_missing = df['cloud'].isna()
df.loc[cloud_missing, 'cloud'] = (
    df['cloud'].shift(1) + df['cloud'].shift(-1)
) / 2
print("Remaining NaNs:\n", df.isna().sum())

In [None]:
df = df.drop(columns=["day_month"])
for lag in range(1, 8):
    df[f"lag_{lag}"] = df["price"].shift(lag)
df.head(10)

In [None]:
df = df.dropna().reset_index(drop=True)

print(df.shape)
df.head()

**Feedforward NN**

In [None]:
import random
import numpy as np
import torch

def set_seed(seed=123):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(123)

In [None]:
import numpy as np

forecast_horizon = 14

# Feature matrix: all columns except target and date
feature_cols = df.columns.difference(['date', 'price']).tolist()
feature_cols.sort()

X = df[feature_cols].values

# Create rolling 14-day future targets
y = []
for i in range(len(df) - forecast_horizon):
    y.append(df['price'].iloc[i+1 : i+1+forecast_horizon].values)

X = X[:len(y)]
y = np.array(y)

In [None]:
def expanding_window_cv(X, y, initial_train_size=1095, horizon=14, step=14):
    """
    Generator yielding expanding window splits with one 14-day forecast per fold.
    """
    n_samples = len(X)
    start = initial_train_size

    while start + 1 <= n_samples:
        X_train = X[:start]
        y_train = y[:start]
        X_val = X[start:start + 1]  # Single sample (14-day forecast)
        y_val = y[start:start + 1]
        yield X_train, y_train, X_val, y_val
        start += step

In [None]:
for fold, (X_tr, y_tr, X_val, y_val) in enumerate(
    expanding_window_cv(X, y, initial_train_size=1095, horizon=14, step=14)
):
    print(f"Fold {fold+1}")
    print(f"  Train shape: {X_tr.shape}, {y_tr.shape}")
    print(f"  Val shape:   {X_val.shape}, {y_val.shape}")

In [None]:
from sklearn.preprocessing import StandardScaler

# Use first fold
X_train, y_train, X_val, y_val = next(expanding_window_cv(X, y))

scaler_X = StandardScaler()
scaler_y = StandardScaler()

X_train_scaled = scaler_X.fit_transform(X_train)
X_val_scaled = scaler_X.transform(X_val)

y_train_scaled = scaler_y.fit_transform(y_train)
y_val_scaled = scaler_y.transform(y_val)

print("Scaled shapes:")
print("  X_train_scaled:", X_train_scaled.shape)
print("  X_val_scaled:  ", X_val_scaled.shape)
print("  y_train_scaled:", y_train_scaled.shape)
print("  y_val_scaled:  ", y_val_scaled.shape)

In [None]:
import torch
from torch.utils.data import Dataset, DataLoader

class PriceForecastDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

In [None]:
import torch.nn as nn

class FeedforwardNN(nn.Module):
    def __init__(self, input_dim, hidden_dims=[64, 32], dropout=0.2):
        super().__init__()
        layers = []
        dims = [input_dim] + hidden_dims
        for i in range(len(dims) - 1):
            layers.append(nn.Linear(dims[i], dims[i + 1]))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(dropout))
        layers.append(nn.Linear(dims[-1], 14))  # 14-day forecast
        self.model = nn.Sequential(*layers)

    def forward(self, x):
        return self.model(x)

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np

def train_one_fold(X_train, y_train, X_val, y_val, epochs=10, lr=0.001, batch_size=32):
    # Scale input/output
    scaler_X = StandardScaler()
    scaler_y = StandardScaler()

    X_train_scaled = scaler_X.fit_transform(X_train)
    X_val_scaled = scaler_X.transform(X_val)

    y_train_scaled = scaler_y.fit_transform(y_train)
    y_val_scaled = scaler_y.transform(y_val)

    # Datasets and DataLoaders
    train_dataset = PriceForecastDataset(X_train_scaled, y_train_scaled)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)

    # Model setup
    model = FeedforwardNN(input_dim=X_train.shape[1]).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = nn.MSELoss()

    # Training
    model.train()
    for epoch in range(epochs):
        for X_batch, y_batch in train_loader:
            X_batch = X_batch.to(device)
            y_batch = y_batch.to(device)

            optimizer.zero_grad()
            preds = model(X_batch)
            loss = criterion(preds, y_batch)
            loss.backward()
            optimizer.step()

    # Evaluation
    model.eval()
    with torch.no_grad():
        X_val_tensor = torch.tensor(X_val_scaled, dtype=torch.float32).to(device)
        preds_scaled = model(X_val_tensor).cpu().numpy()
        preds = scaler_y.inverse_transform(preds_scaled)
        actuals = scaler_y.inverse_transform(y_val_scaled)

    # Metrics
    mae = mean_absolute_error(actuals.flatten(), preds.flatten())
    mse = mean_squared_error(actuals.flatten(), preds.flatten())
    rmse = np.sqrt(mse)
    mape = np.mean(np.abs((actuals - preds) / np.abs(actuals))) * 100

    return mae, rmse, mape

In [None]:
mae_scores = []
rmse_scores = []
mape_scores = []

for fold, (X_tr, y_tr, X_val, y_val) in enumerate(
    expanding_window_cv(X, y, initial_train_size=1095, horizon=14, step=14)
):
    print(f"Fold {fold+1}")
    mae, rmse, mape = train_one_fold(X_tr, y_tr, X_val, y_val)
    print(f"  MAE: {mae:.4f}, RMSE: {rmse:.4f}, MAPE: {mape:.4f}")
    mae_scores.append(mae)
    rmse_scores.append(rmse)
    mape_scores.append(mape)

print("\n Final CV Results:")
print(f"Average MAE:  {np.mean(mae_scores):.4f}")
print(f"Average RMSE: {np.mean(rmse_scores):.4f}")
print(f"Average MAPE: {np.mean(mape_scores):.4f}")

In [None]:
# with early stopping
def train_one_fold(X_train, y_train, X_val, y_val,
                   epochs=50, lr=0.001, batch_size=32, patience=5):
    torch.manual_seed(123)

    from sklearn.preprocessing import StandardScaler
    from sklearn.metrics import mean_absolute_error, mean_squared_error
    import numpy as np

    # Scale inputs
    scaler_X = StandardScaler()
    scaler_y = StandardScaler()

    X_train_scaled = scaler_X.fit_transform(X_train)
    X_val_scaled = scaler_X.transform(X_val)

    y_train_scaled = scaler_y.fit_transform(y_train)
    y_val_scaled = scaler_y.transform(y_val)

    # Dataset & DataLoader
    train_dataset = PriceForecastDataset(X_train_scaled, y_train_scaled)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)

    # Model setup
    model = FeedforwardNN(input_dim=X_train.shape[1]).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=1e-4)
    criterion = nn.MSELoss()

    # Early stopping setup
    best_val_loss = float('inf')
    best_model_state = None
    patience_counter = 0

    for epoch in range(epochs):
        model.train()
        total_loss = 0

        for X_batch, y_batch in train_loader:
            X_batch = X_batch.to(device)
            y_batch = y_batch.to(device)

            optimizer.zero_grad()
            preds = model(X_batch)
            loss = criterion(preds, y_batch)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        # Validation loss (on 1 val sample)
        model.eval()
        with torch.no_grad():
            X_val_tensor = torch.tensor(X_val_scaled, dtype=torch.float32).to(device)
            y_val_tensor = torch.tensor(y_val_scaled, dtype=torch.float32).to(device)
            val_preds = model(X_val_tensor)
            val_loss = criterion(val_preds, y_val_tensor).item()

        # Early stopping check
        if val_loss < best_val_loss - 1e-4:
            best_val_loss = val_loss
            best_model_state = model.state_dict()
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f"Early stopping at epoch {epoch+1}")
                break

    # Restore best model
    if best_model_state is not None:
        model.load_state_dict(best_model_state)

    # Final prediction
    model.eval()
    with torch.no_grad():
        X_val_tensor = torch.tensor(X_val_scaled, dtype=torch.float32).to(device)
        preds_scaled = model(X_val_tensor).cpu().numpy()
        preds = scaler_y.inverse_transform(preds_scaled)
        actuals = scaler_y.inverse_transform(y_val_scaled)

    mae = mean_absolute_error(actuals.flatten(), preds.flatten())
    rmse = np.sqrt(mean_squared_error(actuals.flatten(), preds.flatten()))
    mape = np.mean(np.abs((actuals - preds) / (actuals + 1e-8))) * 100

    return mae, rmse, mape

In [None]:
mae_scores = []
rmse_scores = []
mape_scores = []

for fold, (X_tr, y_tr, X_val, y_val) in enumerate(
    expanding_window_cv(X, y, initial_train_size=1095, horizon=14, step=14)
):
    print(f"Fold {fold+1}")
    mae, rmse, mape = train_one_fold(X_tr, y_tr, X_val, y_val)
    print(f"  MAE: {mae:.4f}, RMSE: {rmse:.4f}, MAPE: {mape:.4f}")
    mae_scores.append(mae)
    rmse_scores.append(rmse)
    mape_scores.append(mape)

print("\n Final CV Results:")
print(f"Average MAE:  {np.mean(mae_scores):.4f}")
print(f"Average RMSE: {np.mean(rmse_scores):.4f}")
print(f"Average MAPE: {np.mean(mape_scores):.4f}")



Hyperparameter tunning

In [None]:
def train_one_fold(X_train, y_train, X_val, y_val,
                   hidden_dims=[64, 32], dropout=0.2,
                   epochs=50, lr=0.001, batch_size=32, patience=5):

    from sklearn.preprocessing import StandardScaler
    from sklearn.metrics import mean_absolute_error, mean_squared_error

    # Scale input/output
    scaler_X = StandardScaler()
    scaler_y = StandardScaler()

    X_train_scaled = scaler_X.fit_transform(X_train)
    X_val_scaled = scaler_X.transform(X_val)

    y_train_scaled = scaler_y.fit_transform(y_train)
    y_val_scaled = scaler_y.transform(y_val)

    # PyTorch dataset and DataLoader
    train_dataset = PriceForecastDataset(X_train_scaled, y_train_scaled)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)

    # Model
    model = FeedforwardNN(input_dim=X_train.shape[1], hidden_dims=hidden_dims, dropout=dropout).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=1e-4)
    criterion = nn.MSELoss()

    # Early stopping
    best_val_loss = float('inf')
    best_model_state = None
    patience_counter = 0

    for epoch in range(epochs):
        model.train()
        for X_batch, y_batch in train_loader:
            X_batch = X_batch.to(device)
            y_batch = y_batch.to(device)

            optimizer.zero_grad()
            preds = model(X_batch)
            loss = criterion(preds, y_batch)
            loss.backward()
            optimizer.step()

        # Validation
        model.eval()
        with torch.no_grad():
            X_val_tensor = torch.tensor(X_val_scaled, dtype=torch.float32).to(device)
            y_val_tensor = torch.tensor(y_val_scaled, dtype=torch.float32).to(device)
            val_preds = model(X_val_tensor)
            val_loss = criterion(val_preds, y_val_tensor).item()

        if val_loss < best_val_loss - 1e-4:
            best_val_loss = val_loss
            best_model_state = model.state_dict()
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
         #       print(f"Early stopping at epoch {epoch+1}")
                break

    # Load best model
    if best_model_state is not None:
        model.load_state_dict(best_model_state)

    # Final prediction
    model.eval()
    with torch.no_grad():
        X_val_tensor = torch.tensor(X_val_scaled, dtype=torch.float32).to(device)
        preds_scaled = model(X_val_tensor).cpu().numpy()
        preds = scaler_y.inverse_transform(preds_scaled)
        actuals = scaler_y.inverse_transform(y_val_scaled)

    # Metrics
    mae = mean_absolute_error(actuals.flatten(), preds.flatten())
    rmse = np.sqrt(mean_squared_error(actuals.flatten(), preds.flatten()))
    mape = np.mean(np.abs((actuals - preds) / (actuals + 1e-8))) * 100

    return mae, rmse, mape

In [None]:
import itertools

# Define hyperparameter options
hidden_layer_options = [[64, 32], [32, 16], [128, 64, 32], [64]]
dropout_options = [0.0, 0.2, 0.3]
lr_options = [0.001, 0.0005]

# All combinations
param_grid = list(itertools.product(hidden_layer_options, dropout_options, lr_options))

In [None]:
results = []

for i, (hidden_dims, dropout, lr) in enumerate(param_grid):
    print(f"\n Config {i+1}/{len(param_grid)}: hidden={hidden_dims}, dropout={dropout}, lr={lr}")

    mae_scores = []
    rmse_scores = []
    mape_scores = []

    for fold, (X_tr, y_tr, X_val, y_val) in enumerate(
        expanding_window_cv(X, y, initial_train_size=1095, horizon=14, step=14)
    ):
        mae, rmse, mape = train_one_fold(
            X_tr, y_tr, X_val, y_val,
            hidden_dims=hidden_dims,
            dropout=dropout,
            lr=lr,
            epochs=50,
            patience=5,
            batch_size=32
        )
        mae_scores.append(mae)
        rmse_scores.append(rmse)
        mape_scores.append(mape)

    avg_mae = np.mean(mae_scores)
    avg_rmse = np.mean(rmse_scores)
    avg_mape = np.mean(mape_scores)

    results.append({
        "hidden_dims": hidden_dims,
        "dropout": dropout,
        "lr": lr,
        "MAE": avg_mae,
        "RMSE": avg_rmse,
        "MAPE": avg_mape
    })

    print(f"Avg MAE: {avg_mae:.4f}, RMSE: {avg_rmse:.4f}, MAPE: {avg_mape:.4f}")

In [None]:
import pandas as pd

df_results = pd.DataFrame(results)
df_results = df_results.sort_values(by="RMSE").reset_index(drop=True)
df_results

**LSTM**

In [None]:
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from tqdm import trange

# --- 1. LSTM Model ---
class LSTMForecastNet(nn.Module):
    def __init__(self, input_size, hidden_size=64, num_layers=1, dropout=0.2, output_size=14):
        super().__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers=num_layers,
                            batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        lstm_out, _ = self.lstm(x)  # [batch, seq, hidden]
        out = self.fc(lstm_out[:, -1, :])  # [batch, output_size]
        return out

In [None]:
# --- 2. Dataset Class  ---
class LSTMForecastDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)  # [samples, seq_len, features]
        self.y = torch.tensor(y, dtype=torch.float32)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

In [None]:
# --- 3. Sliding Window Input Creation ---
def create_lstm_input(X_scaled, y_scaled, sequence_len=30):
    X_lstm = []
    y_lstm = []
    for i in range(sequence_len, len(X_scaled)):
        X_window = X_scaled[i-sequence_len:i]
        y_target = y_scaled[i]  # already a 14-element array
        X_lstm.append(X_window)
        y_lstm.append(y_target)
    return np.array(X_lstm), np.array(y_lstm)

In [None]:
# --- 4. Training One Fold ---
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error
import numpy as np
import torch
from torch.utils.data import DataLoader

def train_one_fold(X_train, y_train, X_val, y_val,
                   sequence_len=30, num_epochs=50, patience=5,
                   batch_size=32, lr=0.001, hidden_size=64, num_layers=1, dropout=0.2):

    # --- 1. Scaling ---
    scaler_X = StandardScaler()
    scaler_y = StandardScaler()

    X_train_scaled = scaler_X.fit_transform(X_train)
    X_val_scaled = scaler_X.transform(X_val)

    y_train_scaled = scaler_y.fit_transform(y_train)  # shape: (n_samples, 14)
    y_val_scaled = scaler_y.transform(y_val)

    # --- 2. Create LSTM sequences ---
    X_train_seq, y_train_seq = create_lstm_input(X_train_scaled, y_train_scaled, sequence_len)
    X_val_seq, y_val_seq = create_lstm_input(X_val_scaled, y_val_scaled, sequence_len)

    if X_val_seq.shape[0] == 0:
        print("Skipping fold: Not enough validation sequences.")
        return None, None, None

    # --- 3. Datasets and Loaders ---
    train_loader = DataLoader(
        LSTMForecastDataset(X_train_seq, y_train_seq), batch_size=batch_size, shuffle=False
    )
    val_loader = DataLoader(
        LSTMForecastDataset(X_val_seq, y_val_seq), batch_size=batch_size, shuffle=False
    )

    # --- 4. Model setup ---
    model = LSTMForecastNet(
        input_size=X_train_seq.shape[2],
        hidden_size=hidden_size,
        num_layers=num_layers,
        dropout=dropout
    ).to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = nn.MSELoss()

    # --- 5. Training Loop with Early Stopping ---
    best_val_loss = float('inf')
    patience_counter = 0
    best_model_state = None

    for epoch in range(num_epochs):
        model.train()
        for xb, yb in train_loader:
            xb, yb = xb.to(device), yb.to(device)

            optimizer.zero_grad()
            preds = model(xb)  # shape: (batch_size, 14)
            loss = criterion(preds, yb)
            loss.backward()
            optimizer.step()

        # Validation
        model.eval()
        with torch.no_grad():
            val_preds = torch.cat([model(xb.to(device)) for xb, _ in val_loader])
            val_targets = torch.cat([yb.to(device) for _, yb in val_loader])
            val_loss = criterion(val_preds, val_targets)

        # Early stopping
        if val_loss.item() < best_val_loss - 1e-4:
            best_val_loss = val_loss.item()
            patience_counter = 0
            best_model_state = model.state_dict()
        else:
            patience_counter += 1
            if patience_counter >= patience:
                break # Early stopping is triggered here

    # --- 6. Load best model ---
    if best_model_state:
        model.load_state_dict(best_model_state)

    # --- 7. Final Evaluation ---
    model.eval()
    with torch.no_grad():
        X_val_tensor = torch.tensor(X_val_seq, dtype=torch.float32).to(device)
        preds_scaled = model(X_val_tensor).cpu().numpy()
        targets_scaled = y_val_seq  # already NumPy array

    # Inverse scaling
    preds = scaler_y.inverse_transform(preds_scaled)
    targets = scaler_y.inverse_transform(targets_scaled)

    # --- 8. Metrics ---
    mae = mean_absolute_error(targets.flatten(), preds.flatten())
    rmse = np.sqrt(mean_squared_error(targets.flatten(), preds.flatten()))
    mape = mean_absolute_percentage_error(targets.flatten(), preds.flatten()) * 100

    return mae, rmse, mape

In [None]:
# --- 5. Expanding Window Cross Validation ---
def expanding_window_cv(X, y, initial_train_size, horizon, step, sequence_len):
    # Ensure we have enough data for multiple sequences in validation
    min_val_size = 60  # Should be > sequence_len + horizon to allow multiple samples
    max_idx = len(X)

    for start in range(initial_train_size, max_idx - min_val_size + 1, step):
        end_val = start + min_val_size
        if end_val > max_idx:
            break
        X_tr, y_tr = X[:start], y[:start]
        X_val, y_val = X[start:end_val], y[start:end_val]
        yield X_tr, y_tr, X_val, y_val

In [None]:
# --- 6. Main CV Execution ---
import warnings
warnings.filterwarnings("ignore", message="dropout option adds dropout after all but last recurrent layer")
warnings.filterwarnings("ignore", message="To copy construct from a tensor")

sequence_len = 30
horizon = 14
initial_train_size = 1095  # 3 years

mae_scores = []
rmse_scores = []
mape_scores = []

for fold, (X_tr, y_tr, X_val, y_val) in enumerate(
    expanding_window_cv(X, y, initial_train_size=initial_train_size, horizon=horizon, step=horizon, sequence_len=sequence_len)
):
    print(f"\nFold {fold+1}")
    mae, rmse, mape = train_one_fold(
        X_tr, y_tr, X_val, y_val, sequence_len=sequence_len
    )
    if mae is None:
        continue  # Skip if fold was invalid
    print(f"  MAE: {mae:.4f}, RMSE: {rmse:.4f}, MAPE: {mape:.4f}")
    mae_scores.append(mae)
    rmse_scores.append(rmse)
    mape_scores.append(mape)

# --- 7. Final Results ---
print("\n Final CV Results:")
print(f"Average MAE:  {np.mean(mae_scores):.4f}")
print(f"Average RMSE: {np.mean(rmse_scores):.4f}")
print(f"Average MAPE: {np.mean(mape_scores):.4f}")

Hyperparamter tunning 1

In [None]:
import itertools
import pandas as pd

# Define hyperparameter grid
hidden_sizes = [32, 64, 128]
num_layers_list = [1, 2, 3]
dropouts = [0.0, 0.1, 0.2, 0.3]
learning_rates = [0.001] # subsistute with 0.0005
batch_sizes = [32] # substitute with 64
sequence_lens = [30]

# Create grid of all combinations
param_grid = list(itertools.product(
    hidden_sizes,
    num_layers_list,
    dropouts,
    learning_rates,
    batch_sizes,
    sequence_lens
))

print(f"Total combinations: {len(param_grid)}")

In [None]:
results = []

for i, (hidden_size, num_layers, dropout, lr, batch_size, sequence_len) in enumerate(param_grid):
    print(f"\n Config {i+1}/{len(param_grid)}: hidden={hidden_size}, layers={num_layers}, dropout={dropout}, "
          f"lr={lr}, batch_size={batch_size}, seq_len={sequence_len}")

    mae_scores = []
    rmse_scores = []
    mape_scores = []

    for fold, (X_tr, y_tr, X_val, y_val) in enumerate(
        expanding_window_cv(X, y,
                            initial_train_size=1095,
                            horizon=14,
                            step=14,
                            sequence_len=sequence_len)
    ):
        mae, rmse, mape = train_one_fold(
            X_tr, y_tr, X_val, y_val,
            sequence_len=sequence_len,
            num_epochs=50,
            patience=5,
            batch_size=batch_size,
            lr=lr,
            hidden_size=hidden_size,
            num_layers=num_layers,
            dropout=dropout
        )

        if mae is None:
            continue  # skip invalid fold

        mae_scores.append(mae)
        rmse_scores.append(rmse)
        mape_scores.append(mape)

    if mae_scores:  # only record results if at least one fold succeeded
        results.append({
            "hidden_size": hidden_size,
            "num_layers": num_layers,
            "dropout": dropout,
            "lr": lr,
            "batch_size": batch_size,
            "sequence_len": sequence_len,
            "MAE": np.mean(mae_scores),
            "RMSE": np.mean(rmse_scores),
            "MAPE": np.mean(mape_scores)
        })

In [None]:
df_results = pd.DataFrame(results)
df_results = df_results.sort_values("RMSE").reset_index(drop=True)
df_results

Hyperparameter tunning 2

In [None]:
import itertools
import pandas as pd

# Define hyperparameter grid
hidden_sizes = [32, 64, 128]
num_layers_list = [1, 2, 3]
dropouts = [0.0, 0.1, 0.2, 0.3]
learning_rates = [0.0005]
batch_sizes = [32] # substitute with 64
sequence_lens = [30]

# Create grid of all combinations
param_grid = list(itertools.product(
    hidden_sizes,
    num_layers_list,
    dropouts,
    learning_rates,
    batch_sizes,
    sequence_lens
))

print(f"Total combinations: {len(param_grid)}")

In [None]:
results = []

for i, (hidden_size, num_layers, dropout, lr, batch_size, sequence_len) in enumerate(param_grid):
    print(f"\n Config {i+1}/{len(param_grid)}: hidden={hidden_size}, layers={num_layers}, dropout={dropout}, "
          f"lr={lr}, batch_size={batch_size}, seq_len={sequence_len}")

    mae_scores = []
    rmse_scores = []
    mape_scores = []

    for fold, (X_tr, y_tr, X_val, y_val) in enumerate(
        expanding_window_cv(X, y,
                            initial_train_size=1095,
                            horizon=14,
                            step=14,
                            sequence_len=sequence_len)
    ):
        mae, rmse, mape = train_one_fold(
            X_tr, y_tr, X_val, y_val,
            sequence_len=sequence_len,
            num_epochs=50,
            patience=5,
            batch_size=batch_size,
            lr=lr,
            hidden_size=hidden_size,
            num_layers=num_layers,
            dropout=dropout
        )

        if mae is None:
            continue  # skip invalid fold

        mae_scores.append(mae)
        rmse_scores.append(rmse)
        mape_scores.append(mape)

    if mae_scores:  # only record results if at least one fold succeeded
        results.append({
            "hidden_size": hidden_size,
            "num_layers": num_layers,
            "dropout": dropout,
            "lr": lr,
            "batch_size": batch_size,
            "sequence_len": sequence_len,
            "MAE": np.mean(mae_scores),
            "RMSE": np.mean(rmse_scores),
            "MAPE": np.mean(mape_scores)
        })

In [None]:
df_results = pd.DataFrame(results)
df_results = df_results.sort_values("RMSE").reset_index(drop=True)
df_results

Hyperparameter tunning 3

In [None]:
import itertools
import pandas as pd

# Define hyperparameter grid
hidden_sizes = [32, 64, 128]
num_layers_list = [1, 2, 3]
dropouts = [0.0, 0.1, 0.2, 0.3]
learning_rates = [0.001]
batch_sizes = [64]
sequence_lens = [30]

# Create grid of all combinations
param_grid = list(itertools.product(
    hidden_sizes,
    num_layers_list,
    dropouts,
    learning_rates,
    batch_sizes,
    sequence_lens
))

print(f"Total combinations: {len(param_grid)}")

In [None]:
results = []

for i, (hidden_size, num_layers, dropout, lr, batch_size, sequence_len) in enumerate(param_grid):
    print(f"\n Config {i+1}/{len(param_grid)}: hidden={hidden_size}, layers={num_layers}, dropout={dropout}, "
          f"lr={lr}, batch_size={batch_size}, seq_len={sequence_len}")

    mae_scores = []
    rmse_scores = []
    mape_scores = []

    for fold, (X_tr, y_tr, X_val, y_val) in enumerate(
        expanding_window_cv(X, y,
                            initial_train_size=1095,
                            horizon=14,
                            step=14,
                            sequence_len=sequence_len)
    ):
        mae, rmse, mape = train_one_fold(
            X_tr, y_tr, X_val, y_val,
            sequence_len=sequence_len,
            num_epochs=50,
            patience=5,
            batch_size=batch_size,
            lr=lr,
            hidden_size=hidden_size,
            num_layers=num_layers,
            dropout=dropout
        )

        if mae is None:
            continue  # skip invalid fold

        mae_scores.append(mae)
        rmse_scores.append(rmse)
        mape_scores.append(mape)

    if mae_scores:  # only record results if at least one fold succeeded
        results.append({
            "hidden_size": hidden_size,
            "num_layers": num_layers,
            "dropout": dropout,
            "lr": lr,
            "batch_size": batch_size,
            "sequence_len": sequence_len,
            "MAE": np.mean(mae_scores),
            "RMSE": np.mean(rmse_scores),
            "MAPE": np.mean(mape_scores)
        })

In [None]:
df_results = pd.DataFrame(results)
df_results = df_results.sort_values("RMSE").reset_index(drop=True)
df_results

Hyperparameter tunning 4

In [None]:
import itertools
import pandas as pd

# Define hyperparameter grid
hidden_sizes = [32, 64, 128]
num_layers_list = [1, 2, 3]
dropouts = [0.0, 0.1, 0.2, 0.3]
learning_rates = [0.001]
batch_sizes = [64]
sequence_lens = [30]

# Create grid of all combinations
param_grid = list(itertools.product(
    hidden_sizes,
    num_layers_list,
    dropouts,
    learning_rates,
    batch_sizes,
    sequence_lens
))

print(f"Total combinations: {len(param_grid)}")

In [None]:
results = []

for i, (hidden_size, num_layers, dropout, lr, batch_size, sequence_len) in enumerate(param_grid):
    print(f"\n Config {i+1}/{len(param_grid)}: hidden={hidden_size}, layers={num_layers}, dropout={dropout}, "
          f"lr={lr}, batch_size={batch_size}, seq_len={sequence_len}")

    mae_scores = []
    rmse_scores = []
    mape_scores = []

    for fold, (X_tr, y_tr, X_val, y_val) in enumerate(
        expanding_window_cv(X, y,
                            initial_train_size=1095,
                            horizon=14,
                            step=14,
                            sequence_len=sequence_len)
    ):
        mae, rmse, mape = train_one_fold(
            X_tr, y_tr, X_val, y_val,
            sequence_len=sequence_len,
            num_epochs=50,
            patience=5,
            batch_size=batch_size,
            lr=lr,
            hidden_size=hidden_size,
            num_layers=num_layers,
            dropout=dropout
        )

        if mae is None:
            continue  # skip invalid fold

        mae_scores.append(mae)
        rmse_scores.append(rmse)
        mape_scores.append(mape)

    if mae_scores:  # only record results if at least one fold succeeded
        results.append({
            "hidden_size": hidden_size,
            "num_layers": num_layers,
            "dropout": dropout,
            "lr": lr,
            "batch_size": batch_size,
            "sequence_len": sequence_len,
            "MAE": np.mean(mae_scores),
            "RMSE": np.mean(rmse_scores),
            "MAPE": np.mean(mape_scores)
        })

In [None]:
df_results = pd.DataFrame(results)
df_results = df_results.sort_values("RMSE").reset_index(drop=True)
df_results

More restricted grid search

In [None]:
import itertools
import pandas as pd
import numpy as np

# Restricted Grid
hidden_sizes = [64, 128]
num_layers_list = [1, 2]
lrs = [0.001, 0.0005]
dropouts = [0.1, 0.3]
sequence_lens = [30]

# Fixed parameters
batch_size = 64
num_epochs = 50
patience = 5
initial_train_size = 1095
horizon = 14
step = 14

# All combinations
param_grid = list(itertools.product(hidden_sizes, num_layers_list, lrs, dropouts, sequence_lens))

results = []

# Grid search loop
for i, (hidden_size, num_layers, lr, dropout, sequence_len) in enumerate(param_grid):
    print(f"\n Config {i+1}/{len(param_grid)} â€” hidden: {hidden_size}, layers: {num_layers}, "
          f"lr: {lr}, dropout: {dropout}, seq_len: {sequence_len}")

    mae_scores = []
    rmse_scores = []
    mape_scores = []

    for fold, (X_tr, y_tr, X_val, y_val) in enumerate(
        expanding_window_cv(X, y, initial_train_size=initial_train_size, horizon=horizon, step=step, sequence_len=sequence_len)
    ):
        mae, rmse, mape = train_one_fold(
            X_tr, y_tr, X_val, y_val,
            sequence_len=sequence_len,
            hidden_size=hidden_size,
            num_layers=num_layers,
            dropout=dropout,
            lr=lr,
            batch_size=batch_size,
            num_epochs=num_epochs,
            patience=patience
        )
        if mae is not None:
            mae_scores.append(mae)
            rmse_scores.append(rmse)
            mape_scores.append(mape)

    # Aggregate fold results
    results.append({
        "hidden_size": hidden_size,
        "num_layers": num_layers,
        "lr": lr,
        "dropout": dropout,
        "sequence_len": sequence_len,
        "MAE": np.mean(mae_scores),
        "RMSE": np.mean(rmse_scores),
        "MAPE": np.mean(mape_scores)
    })

# Final DataFrame
df_lstm_results = pd.DataFrame(results)
df_lstm_results = df_lstm_results.sort_values("RMSE").reset_index(drop=True)
df_lstm_results

GRU

In [None]:
class GRUForecastNet(nn.Module):
    def __init__(self, input_size, hidden_size=64, num_layers=1, dropout=0.2, output_size=14):
        super().__init__()
        self.gru = nn.GRU(input_size, hidden_size, num_layers=num_layers,
                          batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        gru_out, _ = self.gru(x)  # [batch, seq, hidden]
        out = self.fc(gru_out[:, -1, :])  # [batch, output_size]
        return out

In [None]:
def train_one_gru_fold(X_train, y_train, X_val, y_val,
                       sequence_len=30, num_epochs=50, patience=5,
                       batch_size=32, lr=0.001, hidden_size=64, num_layers=1, dropout=0.2):

    # --- 1. Scaling ---
    scaler_X = StandardScaler()
    scaler_y = StandardScaler()

    X_train_scaled = scaler_X.fit_transform(X_train)
    X_val_scaled = scaler_X.transform(X_val)

    y_train_scaled = scaler_y.fit_transform(y_train)  # shape: (n_samples, 14)
    y_val_scaled = scaler_y.transform(y_val)

    # --- 2. Create sequences ---
    X_train_seq, y_train_seq = create_lstm_input(X_train_scaled, y_train_scaled, sequence_len)
    X_val_seq, y_val_seq = create_lstm_input(X_val_scaled, y_val_scaled, sequence_len)

    if X_val_seq.shape[0] == 0:
        print("Skipping fold: Not enough validation sequences.")
        return None, None, None

    # --- 3. Data loaders ---
    train_loader = DataLoader(
        LSTMForecastDataset(X_train_seq, y_train_seq), batch_size=batch_size, shuffle=False
    )
    val_loader = DataLoader(
        LSTMForecastDataset(X_val_seq, y_val_seq), batch_size=batch_size, shuffle=False
    )

    # --- 4. Model setup ---
    model = GRUForecastNet(
        input_size=X_train_seq.shape[2],
        hidden_size=hidden_size,
        num_layers=num_layers,
        dropout=dropout
    ).to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = nn.MSELoss()

    best_val_loss = float('inf')
    patience_counter = 0
    best_model_state = None

    # --- 5. Training loop ---
    for epoch in range(num_epochs):
        model.train()
        for xb, yb in train_loader:
            xb, yb = xb.to(device), yb.to(device)

            optimizer.zero_grad()
            preds = model(xb)
            loss = criterion(preds, yb)
            loss.backward()
            optimizer.step()

        model.eval()
        with torch.no_grad():
            val_preds = torch.cat([model(xb.to(device)) for xb, _ in val_loader])
            val_targets = torch.cat([yb.to(device) for _, yb in val_loader])
            val_loss = criterion(val_preds, val_targets)

        if val_loss.item() < best_val_loss - 1e-4:
            best_val_loss = val_loss.item()
            patience_counter = 0
            best_model_state = model.state_dict()
        else:
            patience_counter += 1
            if patience_counter >= patience:
                break

    # --- 6. Load best model ---
    if best_model_state:
        model.load_state_dict(best_model_state)

    # --- 7. Final evaluation ---
    model.eval()
    with torch.no_grad():
        X_val_tensor = torch.tensor(X_val_seq, dtype=torch.float32).to(device)
        preds_scaled = model(X_val_tensor).cpu().numpy()
        targets_scaled = y_val_seq

    preds = scaler_y.inverse_transform(preds_scaled)
    targets = scaler_y.inverse_transform(targets_scaled)

    # --- 8. Metrics ---
    mae = mean_absolute_error(targets.flatten(), preds.flatten())
    rmse = np.sqrt(mean_squared_error(targets.flatten(), preds.flatten()))
    mape = mean_absolute_percentage_error(targets.flatten(), preds.flatten()) * 100

    return mae, rmse, mape

In [None]:
sequence_len=30

mae_scores = []
rmse_scores = []
mape_scores = []

for fold, (X_tr, y_tr, X_val, y_val) in enumerate(
    expanding_window_cv(X, y, initial_train_size=1095, horizon=14, step=14, sequence_len=sequence_len)
):
    print(f"\nFold {fold+1}")
    mae, rmse, mape = train_one_gru_fold(
        X_tr, y_tr, X_val, y_val, sequence_len=sequence_len
    )
    if mae is None:
        continue  # Skip if fold was invalid
    print(f"  MAE: {mae:.4f}, RMSE: {rmse:.4f}, MAPE: {mape:.2f}")
    mae_scores.append(mae)
    rmse_scores.append(rmse)
    mape_scores.append(mape)

# --- Final CV Results ---
print("\n Final CV Results:")
print(f"Average MAE:  {np.mean(mae_scores):.4f}")
print(f"Average RMSE: {np.mean(rmse_scores):.4f}")
print(f"Average MAPE: {np.mean(mape_scores):.4f}")

Hyperparamter tunning

In [None]:
import itertools

# Hyperparameter grid for GRU
hidden_sizes = [32, 64, 128]
num_layers_list = [1, 2, 3]
dropouts = [0.0, 0.1, 0.2, 0.3]
learning_rates = [0.001, 0.0005]
batch_sizes = [32, 64]

# Cartesian product of all combinations
gru_param_grid = list(itertools.product(hidden_sizes, num_layers_list, dropouts, learning_rates, batch_sizes))
print(f"Total GRU configurations: {len(gru_param_grid)}")

In [None]:
gru_results = []

for i, (hidden_size, num_layers, dropout, lr, batch_size) in enumerate(gru_param_grid):
    print(f"\n Config {i+1}/{len(gru_param_grid)} â†’ hidden={hidden_size}, layers={num_layers}, dropout={dropout}, lr={lr}, batch_size={batch_size}")

    mae_scores = []
    rmse_scores = []
    mape_scores = []

    for fold, (X_tr, y_tr, X_val, y_val) in enumerate(
        expanding_window_cv(X, y, initial_train_size=1095, horizon=14, step=14, sequence_len=30)
    ):
        mae, rmse, mape = train_one_gru_fold(
            X_tr, y_tr, X_val, y_val,
            hidden_size=hidden_size,
            num_layers=num_layers,
            dropout=dropout,
            lr=lr,
            batch_size=batch_size,
            num_epochs=50,
            patience=5,
            sequence_len=30
        )

        if mae is None:
            continue
        mae_scores.append(mae)
        rmse_scores.append(rmse)
        mape_scores.append(mape)

    # Save results
    avg_mae = np.mean(mae_scores)
    avg_rmse = np.mean(rmse_scores)
    avg_mape = np.mean(mape_scores)

    gru_results.append({
        "hidden_size": hidden_size,
        "num_layers": num_layers,
        "dropout": dropout,
        "lr": lr,
        "batch_size": batch_size,
        "MAE": avg_mae,
        "RMSE": avg_rmse,
        "MAPE": avg_mape
    })

    print(f"Avg MAE: {avg_mae:.4f} | RMSE: {avg_rmse:.4f} | MAPE: {avg_mape:.4f}")

In [None]:
import pandas as pd

df_gru_results = pd.DataFrame(gru_results)
df_gru_results = df_gru_results.sort_values(by="RMSE").reset_index(drop=True)
df_gru_results.head()

More restrcited hyperparamter search (hidden size 32)

In [None]:
import itertools

# Hyperparameter grid for GRU
hidden_sizes = [32]
num_layers_list = [1, 2, 3]
dropouts = [0.0, 0.1, 0.2, 0.3]
learning_rates = [0.001, 0.0005]
batch_sizes = [32]

# Cartesian product of all combinations
gru_param_grid = list(itertools.product(hidden_sizes, num_layers_list, dropouts, learning_rates, batch_sizes))
print(f"Total GRU configurations: {len(gru_param_grid)}")

In [None]:
gru_results = []

for i, (hidden_size, num_layers, dropout, lr, batch_size) in enumerate(gru_param_grid):
    print(f"\n Config {i+1}/{len(gru_param_grid)} â†’ hidden={hidden_size}, layers={num_layers}, dropout={dropout}, lr={lr}, batch_size={batch_size}")

    mae_scores = []
    rmse_scores = []
    mape_scores = []

    for fold, (X_tr, y_tr, X_val, y_val) in enumerate(
        expanding_window_cv(X, y, initial_train_size=1095, horizon=14, step=14, sequence_len=30)
    ):
        mae, rmse, mape = train_one_gru_fold(
            X_tr, y_tr, X_val, y_val,
            hidden_size=hidden_size,
            num_layers=num_layers,
            dropout=dropout,
            lr=lr,
            batch_size=batch_size,
            num_epochs=50,
            patience=5,
            sequence_len=30
        )

        if mae is None:
            continue
        mae_scores.append(mae)
        rmse_scores.append(rmse)
        mape_scores.append(mape)

    # Save results
    avg_mae = np.mean(mae_scores)
    avg_rmse = np.mean(rmse_scores)
    avg_mape = np.mean(mape_scores)

    gru_results.append({
        "hidden_size": hidden_size,
        "num_layers": num_layers,
        "dropout": dropout,
        "lr": lr,
        "batch_size": batch_size,
        "MAE": avg_mae,
        "RMSE": avg_rmse,
        "MAPE": avg_mape
    })

    print(f"Avg MAE: {avg_mae:.4f} | RMSE: {avg_rmse:.4f} | MAPE: {avg_mape:.4f}")

In [None]:
import pandas as pd

df_gru_results = pd.DataFrame(gru_results)
df_gru_results = df_gru_results.sort_values(by="RMSE").reset_index(drop=True)
df_gru_results

Hidden size = 64

In [None]:
import itertools

# Hyperparameter grid for GRU
hidden_sizes = [64]
num_layers_list = [1, 2, 3]
dropouts = [0.0, 0.1, 0.2, 0.3]
learning_rates = [0.001, 0.0005]
batch_sizes = [32]

# Cartesian product of all combinations
gru_param_grid = list(itertools.product(hidden_sizes, num_layers_list, dropouts, learning_rates, batch_sizes))
print(f"Total GRU configurations: {len(gru_param_grid)}")

In [None]:
gru_results = []

for i, (hidden_size, num_layers, dropout, lr, batch_size) in enumerate(gru_param_grid):
    print(f"\n Config {i+1}/{len(gru_param_grid)} â†’ hidden={hidden_size}, layers={num_layers}, dropout={dropout}, lr={lr}, batch_size={batch_size}")

    mae_scores = []
    rmse_scores = []
    mape_scores = []

    for fold, (X_tr, y_tr, X_val, y_val) in enumerate(
        expanding_window_cv(X, y, initial_train_size=1095, horizon=14, step=14, sequence_len=30)
    ):
        mae, rmse, mape = train_one_gru_fold(
            X_tr, y_tr, X_val, y_val,
            hidden_size=hidden_size,
            num_layers=num_layers,
            dropout=dropout,
            lr=lr,
            batch_size=batch_size,
            num_epochs=50,
            patience=5,
            sequence_len=30
        )

        if mae is None:
            continue
        mae_scores.append(mae)
        rmse_scores.append(rmse)
        mape_scores.append(mape)

    # Save results
    avg_mae = np.mean(mae_scores)
    avg_rmse = np.mean(rmse_scores)
    avg_mape = np.mean(mape_scores)

    gru_results.append({
        "hidden_size": hidden_size,
        "num_layers": num_layers,
        "dropout": dropout,
        "lr": lr,
        "batch_size": batch_size,
        "MAE": avg_mae,
        "RMSE": avg_rmse,
        "MAPE": avg_mape
    })

    print(f"Avg MAE: {avg_mae:.4f} | RMSE: {avg_rmse:.4f} | MAPE: {avg_mape:.4f}")

In [None]:
import pandas as pd

df_gru_results = pd.DataFrame(gru_results)
df_gru_results = df_gru_results.sort_values(by="RMSE").reset_index(drop=True)
df_gru_results

hidden size = 128

In [None]:
import itertools

# Hyperparameter grid for GRU
hidden_sizes = [128]
num_layers_list = [1, 2, 3]
dropouts = [0.0, 0.1, 0.2, 0.3]
learning_rates = [0.001, 0.0005]
batch_sizes = [32]

# Cartesian product of all combinations
gru_param_grid = list(itertools.product(hidden_sizes, num_layers_list, dropouts, learning_rates, batch_sizes))
print(f"Total GRU configurations: {len(gru_param_grid)}")

In [None]:
gru_results = []

for i, (hidden_size, num_layers, dropout, lr, batch_size) in enumerate(gru_param_grid):
    print(f"\n Config {i+1}/{len(gru_param_grid)} â†’ hidden={hidden_size}, layers={num_layers}, dropout={dropout}, lr={lr}, batch_size={batch_size}")

    mae_scores = []
    rmse_scores = []
    mape_scores = []

    for fold, (X_tr, y_tr, X_val, y_val) in enumerate(
        expanding_window_cv(X, y, initial_train_size=1095, horizon=14, step=14, sequence_len=30)
    ):
        mae, rmse, mape = train_one_gru_fold(
            X_tr, y_tr, X_val, y_val,
            hidden_size=hidden_size,
            num_layers=num_layers,
            dropout=dropout,
            lr=lr,
            batch_size=batch_size,
            num_epochs=50,
            patience=5,
            sequence_len=30
        )

        if mae is None:
            continue
        mae_scores.append(mae)
        rmse_scores.append(rmse)
        mape_scores.append(mape)

    # Save results
    avg_mae = np.mean(mae_scores)
    avg_rmse = np.mean(rmse_scores)
    avg_mape = np.mean(mape_scores)

    gru_results.append({
        "hidden_size": hidden_size,
        "num_layers": num_layers,
        "dropout": dropout,
        "lr": lr,
        "batch_size": batch_size,
        "MAE": avg_mae,
        "RMSE": avg_rmse,
        "MAPE": avg_mape
    })

    print(f"Avg MAE: {avg_mae:.4f} | RMSE: {avg_rmse:.4f} | MAPE: {avg_mape:.4f}")

In [None]:
import pandas as pd

df_gru_results = pd.DataFrame(gru_results)
df_gru_results = df_gru_results.sort_values(by="RMSE").reset_index(drop=True)
df_gru_results

1D CNN

In [None]:
import torch
import torch.nn as nn

class CNN1DForecastNet(nn.Module):
    def __init__(self, input_channels, seq_len, num_outputs=14, num_filters=64, kernel_size=3, dropout=0.2):
        super().__init__()

        self.conv1 = nn.Conv1d(in_channels=input_channels, out_channels=num_filters, kernel_size=kernel_size)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout)

        # Compute output length after conv: L_out = L_in - kernel_size + 1
        conv_out_len = seq_len - kernel_size + 1

        self.flatten_dim = num_filters * conv_out_len
        self.fc = nn.Linear(self.flatten_dim, num_outputs)

    def forward(self, x):
        # x: [batch_size, seq_len, input_channels]
        x = x.permute(0, 2, 1)  # to shape [batch_size, input_channels, seq_len]
        x = self.conv1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = x.view(x.size(0), -1)  # flatten
        x = self.fc(x)
        return x

In [None]:
def train_one_cnn_fold(X_train, y_train, X_val, y_val,
                       sequence_len=30,
                       num_outputs=14,
                       num_filters=64,
                       kernel_size=3,
                       dropout=0.2,
                       num_epochs=50,
                       patience=5,
                       batch_size=32,
                       lr=0.001):

    # --- Scaling ---
    scaler_X = StandardScaler()
    scaler_y = StandardScaler()

    X_train_scaled = scaler_X.fit_transform(X_train)
    X_val_scaled = scaler_X.transform(X_val)

    y_train_scaled = scaler_y.fit_transform(y_train)
    y_val_scaled = scaler_y.transform(y_val)

    # --- Sequence creation ---
    X_train_seq, y_train_seq = create_lstm_input(X_train_scaled, y_train_scaled, sequence_len)
    X_val_seq, y_val_seq = create_lstm_input(X_val_scaled, y_val_scaled, sequence_len)

    if X_val_seq.shape[0] == 0:
        print("Skipping fold: Not enough validation sequences.")
        return None, None, None

    # --- DataLoader ---
    train_loader = DataLoader(LSTMForecastDataset(X_train_seq, y_train_seq), batch_size=batch_size, shuffle=False)
    val_loader = DataLoader(LSTMForecastDataset(X_val_seq, y_val_seq), batch_size=batch_size, shuffle=False)

    # --- Model ---
    model = CNN1DForecastNet(
        input_channels=X_train_seq.shape[2],
        seq_len=sequence_len,
        num_outputs=num_outputs,
        num_filters=num_filters,
        kernel_size=kernel_size,
        dropout=dropout
    ).to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = nn.MSELoss()

    best_val_loss = float('inf')
    best_model_state = None
    patience_counter = 0

    # --- Training ---
    for epoch in range(num_epochs):
        model.train()
        for xb, yb in train_loader:
            xb, yb = xb.to(device), yb.to(device)
            optimizer.zero_grad()
            preds = model(xb)
            loss = criterion(preds, yb)
            loss.backward()
            optimizer.step()

        # --- Validation ---
        model.eval()
        with torch.no_grad():
            val_preds = torch.cat([model(xb.to(device)) for xb, _ in val_loader])
            val_targets = torch.cat([yb.to(device) for _, yb in val_loader])
            val_loss = criterion(val_preds, val_targets)

        if val_loss.item() < best_val_loss - 1e-4:
            best_val_loss = val_loss.item()
            best_model_state = model.state_dict()
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                break

    # --- Load best model ---
    model.load_state_dict(best_model_state)

    # --- Final Evaluation ---
    model.eval()
    with torch.no_grad():
        X_val_tensor = torch.tensor(X_val_seq, dtype=torch.float32).to(device)
        preds_scaled = model(X_val_tensor).cpu().numpy()

    preds = scaler_y.inverse_transform(preds_scaled)
    targets = scaler_y.inverse_transform(y_val_seq)

    mae = mean_absolute_error(targets.flatten(), preds.flatten())
    rmse = np.sqrt(mean_squared_error(targets.flatten(), preds.flatten()))
    mape = mean_absolute_percentage_error(targets.flatten(), preds.flatten()) * 100

    return mae, rmse, mape

In [None]:
sequence_len = 30
horizon = 14
initial_train_size = 1095

mae_scores = []
rmse_scores = []
mape_scores = []

for fold, (X_tr, y_tr, X_val, y_val) in enumerate(
    expanding_window_cv(X, y, initial_train_size=initial_train_size, horizon=horizon, step=horizon, sequence_len=sequence_len)
):
    print(f"\nFold {fold+1}")
    mae, rmse, mape = train_one_cnn_fold(
        X_tr, y_tr, X_val, y_val,
        sequence_len=sequence_len,
        num_epochs=50,
        patience=5,
        batch_size=32,
        lr=0.001,
        num_filters=64,
        kernel_size=3,
        dropout=0.2
    )

    if mae is None:
        continue  # skip invalid fold

    print(f"  MAE: {mae:.4f} | RMSE: {rmse:.4f} | MAPE: {mape:.4f}")
    mae_scores.append(mae)
    rmse_scores.append(rmse)
    mape_scores.append(mape)

# --- 7. Final CV Results ---
print("\n Final CNN CV Results:")
print(f"Average MAE:  {np.mean(mae_scores):.4f}")
print(f"Average RMSE: {np.mean(rmse_scores):.4f}")
print(f"Average MAPE: {np.mean(mape_scores):.4f}")

Initial 1-layer hyperparameter tunning

In [None]:
import itertools

num_filters_list = [32, 64, 128]
kernel_sizes = [2, 3, 5]
dropouts = [0.0, 0.1, 0.2]
learning_rates = [0.001]
batch_sizes = [32]

cnn_grid = list(itertools.product(num_filters_list, kernel_sizes, dropouts, learning_rates, batch_sizes))
print(f"Total CNN configurations: {len(cnn_grid)}")

In [None]:
cnn_results = []

for i, (num_filters, kernel_size, dropout, lr, batch_size) in enumerate(cnn_grid):
    print(f"\n Config {i+1}/{len(cnn_grid)} â†’ filters={num_filters}, kernel={kernel_size}, dropout={dropout}, lr={lr}, batch={batch_size}")

    mae_scores = []
    rmse_scores = []
    mape_scores = []

    for fold, (X_tr, y_tr, X_val, y_val) in enumerate(
        expanding_window_cv(X, y, initial_train_size=1095, horizon=14, step=14, sequence_len=30)
    ):
        mae, rmse, mape = train_one_cnn_fold(
            X_tr, y_tr, X_val, y_val,
            sequence_len=30,
            num_epochs=50,
            patience=5,
            batch_size=batch_size,
            lr=lr,
            num_filters=num_filters,
            kernel_size=kernel_size,
            dropout=dropout
        )

        if mae is not None:
            mae_scores.append(mae)
            rmse_scores.append(rmse)
            mape_scores.append(mape)

    if mae_scores:
        cnn_results.append({
            "filters": num_filters,
            "kernel": kernel_size,
            "dropout": dropout,
            "lr": lr,
            "batch": batch_size,
            "MAE": np.mean(mae_scores),
            "RMSE": np.mean(rmse_scores),
            "MAPE": np.mean(mape_scores)
        })

        print(f"Avg MAE: {np.mean(mae_scores):.2f} | RMSE: {np.mean(rmse_scores):.2f} | MAPE: {np.mean(mape_scores):.2f}")
    else:
        print("Skipped config due to missing fold results")

In [None]:
df_cnn_results = pd.DataFrame(cnn_results)
df_cnn_results = df_cnn_results.sort_values("RMSE").reset_index(drop=True)
df_cnn_results

2-layer CNN

In [None]:
import torch
import torch.nn as nn

class TwoLayerCNNForecastNet(nn.Module):
    def __init__(self, input_channels, seq_len,
                 num_filters1=64, kernel_size1=3,
                 num_filters2=32, kernel_size2=2,
                 dropout=0.0, output_size=14):
        super().__init__()

        self.conv1 = nn.Conv1d(in_channels=input_channels,
                               out_channels=num_filters1,
                               kernel_size=kernel_size1)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool1d(kernel_size=2)

        self.conv2 = nn.Conv1d(in_channels=num_filters1,
                               out_channels=num_filters2,
                               kernel_size=kernel_size2)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.AdaptiveAvgPool1d(1)  # Global pooling

        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(num_filters2, output_size)

    def forward(self, x):
        x = x.permute(0, 2, 1)  # [batch, channels, seq_len]
        x = self.pool1(self.relu1(self.conv1(x)))
        x = self.pool2(self.relu2(self.conv2(x)))  # [batch, channels, 1]
        x = x.squeeze(2)  # [batch, channels]
        x = self.dropout(x)
        out = self.fc(x)  # [batch, output_size]
        return out

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error
import torch
from torch.utils.data import DataLoader, Dataset

def train_one_cnn_fold(X_train, y_train, X_val, y_val,
                       sequence_len=30, num_epochs=50, patience=5,
                       batch_size=32, lr=0.001,
                       num_filters1=64, kernel_size1=3,
                       num_filters2=32, kernel_size2=2,
                       dropout=0.1):

    # 1. Scaling
    scaler_X = StandardScaler()
    scaler_y = StandardScaler()

    X_train_scaled = scaler_X.fit_transform(X_train)
    X_val_scaled = scaler_X.transform(X_val)

    y_train_scaled = scaler_y.fit_transform(y_train)
    y_val_scaled = scaler_y.transform(y_val)

    # 2. Create CNN sequences
    X_train_seq, y_train_seq = create_lstm_input(X_train_scaled, y_train_scaled, sequence_len)
    X_val_seq, y_val_seq = create_lstm_input(X_val_scaled, y_val_scaled, sequence_len)

    if X_val_seq.shape[0] == 0:
        print("Skipping fold: Not enough validation sequences.")
        return None, None, None

    # 3. Dataset & Dataloader
    train_loader = DataLoader(LSTMForecastDataset(X_train_seq, y_train_seq), batch_size=batch_size, shuffle=False)
    val_loader = DataLoader(LSTMForecastDataset(X_val_seq, y_val_seq), batch_size=batch_size, shuffle=False)

    # 4. Model setup
    model = TwoLayerCNNForecastNet(
        input_channels=X_train_seq.shape[2],
        seq_len=sequence_len,
        num_filters1=num_filters1,
        kernel_size1=kernel_size1,
        num_filters2=num_filters2,
        kernel_size2=kernel_size2,
        dropout=dropout,
        output_size=y_train_seq.shape[1]
    ).to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = nn.MSELoss()

    best_val_loss = float('inf')
    patience_counter = 0
    best_model_state = None

    # 5. Training loop
    for epoch in range(num_epochs):
        model.train()
        for xb, yb in train_loader:
            xb, yb = xb.to(device), yb.to(device)

            optimizer.zero_grad()
            preds = model(xb)
            loss = criterion(preds, yb)
            loss.backward()
            optimizer.step()

        # Validation
        model.eval()
        with torch.no_grad():
            val_preds = torch.cat([model(xb.to(device)) for xb, _ in val_loader])
            val_targets = torch.cat([yb.to(device) for _, yb in val_loader])
            val_loss = criterion(val_preds, val_targets)

        if val_loss.item() < best_val_loss - 1e-4:
            best_val_loss = val_loss.item()
            best_model_state = model.state_dict()
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                break

    # 6. Load best model
    if best_model_state:
        model.load_state_dict(best_model_state)

    # 7. Final evaluation
    model.eval()
    with torch.no_grad():
        X_val_tensor = torch.tensor(X_val_seq, dtype=torch.float32).to(device)
        preds_scaled = model(X_val_tensor).cpu().numpy()
        targets_scaled = y_val_seq

    preds = scaler_y.inverse_transform(preds_scaled)
    targets = scaler_y.inverse_transform(targets_scaled)

    # 8. Metrics
    mae = mean_absolute_error(targets.flatten(), preds.flatten())
    rmse = np.sqrt(mean_squared_error(targets.flatten(), preds.flatten()))
    mape = mean_absolute_percentage_error(targets.flatten(), preds.flatten()) * 100

    return mae, rmse, mape

In [None]:
sequence_len = 30
horizon = 14
initial_train_size = 1095

mae_scores = []
rmse_scores = []
mape_scores = []

for fold, (X_tr, y_tr, X_val, y_val) in enumerate(
    expanding_window_cv(X, y, initial_train_size=initial_train_size, horizon=horizon, step=horizon, sequence_len=sequence_len)
):
    print(f"\n Fold {fold+1}")
    mae, rmse, mape = train_one_cnn_fold(
        X_tr, y_tr, X_val, y_val,
        sequence_len=sequence_len,
        num_epochs=50,
        patience=5,
        batch_size=32,
        lr=0.001,
        num_filters1=64,
        kernel_size1=3,
        num_filters2=32,
        kernel_size2=2,
        dropout=0.2
    )

    if mae is None:
        continue  # skip invalid fold

    print(f" MAE: {mae:.4f} | RMSE: {rmse:.4f} | MAPE: {mape:.4f}")
    mae_scores.append(mae)
    rmse_scores.append(rmse)
    mape_scores.append(mape)

# --- 7. Final CV Results ---
print("\n Final CNN CV Results:")
print(f"Average MAE:  {np.mean(mae_scores):.4f}")
print(f"Average RMSE: {np.mean(rmse_scores):.4f}")
print(f"Average MAPE: {np.mean(mape_scores):.4f}")

Hyperparameter tunning

In [None]:
import itertools

# Define hyperparameter options
num_filters1_list = [64, 128]
num_filters2_list = [32, 64]
kernel_size1_list = [2, 3]
kernel_size2_list = [2, 3]
dropouts = [0.0, 0.1, 0.2]
learning_rates = [0.001]
batch_sizes = [32]

# Create full grid of combinations
param_grid = list(itertools.product(
    num_filters1_list,
    num_filters2_list,
    kernel_size1_list,
    kernel_size2_list,
    dropouts,
    learning_rates,
    batch_sizes
))

print(f"Total combinations: {len(param_grid)}")

In [None]:
results = []

sequence_len = 30
horizon = 14
initial_train_size = 1095
step = horizon

for i, (nf1, nf2, ks1, ks2, dropout, lr, batch_size) in enumerate(param_grid):
    print(f"\nðŸ”§ Config {i+1}/{len(param_grid)} â†’ "
          f"nf1={nf1}, nf2={nf2}, ks1={ks1}, ks2={ks2}, dropout={dropout}, lr={lr}, batch={batch_size}")

    mae_scores, rmse_scores, mape_scores = [], [], []

    for fold, (X_tr, y_tr, X_val, y_val) in enumerate(
        expanding_window_cv(X, y, initial_train_size=initial_train_size,
                            horizon=horizon, step=step, sequence_len=sequence_len)
    ):
        mae, rmse, mape = train_one_cnn_fold(
            X_tr, y_tr, X_val, y_val,
            sequence_len=sequence_len,
            num_epochs=50,
            patience=5,
            batch_size=batch_size,
            lr=lr,
            num_filters1=nf1,
            num_filters2=nf2,
            kernel_size1=ks1,
            kernel_size2=ks2,
            dropout=dropout
        )

        if mae is None:
            continue

        mae_scores.append(mae)
        rmse_scores.append(rmse)
        mape_scores.append(mape)

    # Average fold results
    avg_mae = np.mean(mae_scores)
    avg_rmse = np.mean(rmse_scores)
    avg_mape = np.mean(mape_scores)

    print(f"Avg MAE: {avg_mae:.4f} | RMSE: {avg_rmse:.4f} | MAPE: {avg_mape:.4f}")

    results.append({
        "filters1": nf1,
        "filters2": nf2,
        "kernel1": ks1,
        "kernel2": ks2,
        "dropout": dropout,
        "lr": lr,
        "batch": batch_size,
        "MAE": avg_mae,
        "RMSE": avg_rmse,
        "MAPE": avg_mape
    })

In [None]:
import pandas as pd

df_results = pd.DataFrame(results)
df_results = df_results.sort_values(by="RMSE").reset_index(drop=True)

# Display best configs
df_results

TCN

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Chomp1d(nn.Module):
    """Removes padding from the end to maintain causality."""
    def __init__(self, chomp_size):
        super().__init__()
        self.chomp_size = chomp_size

    def forward(self, x):
        return x[:, :, :-self.chomp_size]  # Remove last chomp_size elements


class TemporalBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride, dilation, padding, dropout):
        super().__init__()
        self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size,
                               stride=stride, padding=padding, dilation=dilation)
        self.chomp1 = Chomp1d(padding)
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(dropout)

        self.conv2 = nn.Conv1d(out_channels, out_channels, kernel_size,
                               stride=stride, padding=padding, dilation=dilation)
        self.chomp2 = Chomp1d(padding)
        self.relu2 = nn.ReLU()
        self.dropout2 = nn.Dropout(dropout)

        self.downsample = nn.Conv1d(in_channels, out_channels, 1) if in_channels != out_channels else None
        self.relu = nn.ReLU()

    def forward(self, x):
        out = self.conv1(x)
        out = self.chomp1(out)
        out = self.relu1(out)
        out = self.dropout1(out)

        out = self.conv2(out)
        out = self.chomp2(out)
        out = self.relu2(out)
        out = self.dropout2(out)

        res = x if self.downsample is None else self.downsample(x)
        return self.relu(out + res)


class TemporalConvNet(nn.Module):
    def __init__(self, num_inputs, num_channels, kernel_size=3, dropout=0.2):
        """
        num_inputs: number of input features
        num_channels: list of output channels for each TCN layer (e.g. [64, 64])
        """
        super().__init__()
        layers = []
        num_levels = len(num_channels)
        for i in range(num_levels):
            dilation_size = 2 ** i
            in_channels = num_inputs if i == 0 else num_channels[i-1]
            out_channels = num_channels[i]
            layers += [TemporalBlock(in_channels, out_channels, kernel_size, stride=1,
                                     dilation=dilation_size, padding=(kernel_size-1)*dilation_size,
                                     dropout=dropout)]
        self.network = nn.Sequential(*layers)

    def forward(self, x):
        # x: [batch, seq_len, features]
        x = x.permute(0, 2, 1)  # [batch, features, seq_len]
        out = self.network(x)
        out = out[:, :, -1]  # Use last timestep
        return out


class TCNForecastNet(nn.Module):
    def __init__(self, input_size, num_channels=[64, 64], kernel_size=3, dropout=0.2, output_size=14):
        super().__init__()
        self.tcn = TemporalConvNet(num_inputs=input_size, num_channels=num_channels,
                                   kernel_size=kernel_size, dropout=dropout)
        self.fc = nn.Linear(num_channels[-1], output_size)

    def forward(self, x):
        out = self.tcn(x)
        out = self.fc(out)
        return out

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error
import numpy as np
import torch
from torch.utils.data import DataLoader, Dataset

class TCNForecastDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]


def create_tcn_input(X_scaled, y_scaled, sequence_len=30):
    X_seq = []
    y_seq = []
    for i in range(sequence_len, len(X_scaled)):
        X_window = X_scaled[i-sequence_len:i]
        y_target = y_scaled[i]  # shape: (14,)
        X_seq.append(X_window)
        y_seq.append(y_target)
    return np.array(X_seq), np.array(y_seq)


def train_one_tcn_fold(X_train, y_train, X_val, y_val,
                       sequence_len=30, num_epochs=50, patience=5,
                       batch_size=32, lr=0.001,
                       num_channels=[64, 64], kernel_size=3, dropout=0.2):

    # --- 1. Scaling ---
    scaler_X = StandardScaler()
    scaler_y = StandardScaler()

    X_train_scaled = scaler_X.fit_transform(X_train)
    X_val_scaled = scaler_X.transform(X_val)

    y_train_scaled = scaler_y.fit_transform(y_train)
    y_val_scaled = scaler_y.transform(y_val)

    # --- 2. Create TCN inputs ---
    X_train_seq, y_train_seq = create_tcn_input(X_train_scaled, y_train_scaled, sequence_len)
    X_val_seq, y_val_seq = create_tcn_input(X_val_scaled, y_val_scaled, sequence_len)

    if len(X_val_seq) == 0:
        print("Skipping fold: not enough validation sequences.")
        return None, None, None

    # --- 3. Dataloaders ---
    train_loader = DataLoader(
        TCNForecastDataset(X_train_seq, y_train_seq), batch_size=batch_size, shuffle=False
    )
    val_loader = DataLoader(
        TCNForecastDataset(X_val_seq, y_val_seq), batch_size=batch_size, shuffle=False
    )

    # --- 4. Model Setup ---
    model = TCNForecastNet(
        input_size=X_train_seq.shape[2],
        num_channels=num_channels,
        kernel_size=kernel_size,
        dropout=dropout,
        output_size=y_train.shape[1]  # usually 14
    ).to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = torch.nn.MSELoss()

    # --- 5. Early stopping setup ---
    best_val_loss = float('inf')
    patience_counter = 0
    best_model_state = None

    for epoch in range(num_epochs):
        model.train()
        for xb, yb in train_loader:
            xb, yb = xb.to(device), yb.to(device)
            optimizer.zero_grad()
            preds = model(xb)
            loss = criterion(preds, yb)
            loss.backward()
            optimizer.step()

        model.eval()
        with torch.no_grad():
            val_preds = torch.cat([model(xb.to(device)) for xb, _ in val_loader])
            val_targets = torch.cat([yb.to(device) for _, yb in val_loader])
            val_loss = criterion(val_preds, val_targets)

        if val_loss.item() < best_val_loss - 1e-4:
            best_val_loss = val_loss.item()
            best_model_state = model.state_dict()
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                break

    # --- 6. Load best model ---
    if best_model_state is not None:
        model.load_state_dict(best_model_state)

    # --- 7. Final evaluation ---
    model.eval()
    with torch.no_grad():
        X_val_tensor = torch.tensor(X_val_seq, dtype=torch.float32).to(device)
        preds_scaled = model(X_val_tensor).cpu().numpy()

    targets_scaled = y_val_seq
    preds = scaler_y.inverse_transform(preds_scaled)
    targets = scaler_y.inverse_transform(targets_scaled)

    mae = mean_absolute_error(targets.flatten(), preds.flatten())
    rmse = np.sqrt(mean_squared_error(targets.flatten(), preds.flatten()))
    mape = mean_absolute_percentage_error(targets.flatten(), preds.flatten()) * 100

    return mae, rmse, mape

In [None]:
sequence_len = 30
horizon = 14
initial_train_size = 1095

mae_scores = []
rmse_scores = []
mape_scores = []

for fold, (X_tr, y_tr, X_val, y_val) in enumerate(
    expanding_window_cv(X, y, initial_train_size=initial_train_size, horizon=horizon, step=horizon, sequence_len=sequence_len)
):
    print(f"\nFold {fold+1}")
    mae, rmse, mape = train_one_tcn_fold(
        X_tr, y_tr, X_val, y_val,
        sequence_len=sequence_len,
        num_epochs=50,
        patience=5,
        batch_size=32,
        lr=0.001,
        num_channels=[64, 64],
        kernel_size=3,
        dropout=0.2
    )

    if mae is None:
        continue

    print(f"  MAE: {mae:.4f} | RMSE: {rmse:.4f} | MAPE: {mape:.4f}")
    mae_scores.append(mae)
    rmse_scores.append(rmse)
    mape_scores.append(mape)

# --- Final Results ---
print("\n Final TCN CV Results:")
print(f"Average MAE:  {np.mean(mae_scores):.4f}")
print(f"Average RMSE: {np.mean(rmse_scores):.4f}")
print(f"Average MAPE: {np.mean(mape_scores):.4f}")

Hyperparameter tunning (lr = 0.001)

In [None]:
from itertools import product

# Grid options
num_channels_list = [
    [64, 64],
    [128, 64],
    [128, 128]
]

kernel_sizes = [2, 3, 5]
dropouts = [0.0, 0.1, 0.2]
learning_rates = [0.001]
batch_sizes = [32]

# Generate combinations
tcn_param_grid = list(product(num_channels_list, kernel_sizes, dropouts, learning_rates, batch_sizes))
print(f"Total combinations: {len(tcn_param_grid)}")

In [None]:
tcn_results = []

for i, (channels, kernel_size, dropout, lr, batch_size) in enumerate(tcn_param_grid):
    print(f"\n Config {i+1}/{len(tcn_param_grid)} â†’ channels={channels}, kernel={kernel_size}, dropout={dropout}, lr={lr}, batch={batch_size}")

    mae_scores = []
    rmse_scores = []
    mape_scores = []

    for fold, (X_tr, y_tr, X_val, y_val) in enumerate(
        expanding_window_cv(X, y, initial_train_size=1095, horizon=14, step=14, sequence_len=30)
    ):
        mae, rmse, mape = train_one_tcn_fold(
            X_tr, y_tr, X_val, y_val,
            sequence_len=30,
            num_epochs=50,
            patience=5,
            batch_size=batch_size,
            lr=lr,
            num_channels=channels,
            kernel_size=kernel_size,
            dropout=dropout
        )

        if mae is None:
            continue

        mae_scores.append(mae)
        rmse_scores.append(rmse)
        mape_scores.append(mape)

    # Save results
    tcn_results.append({
        "channels": channels,
        "kernel_size": kernel_size,
        "dropout": dropout,
        "lr": lr,
        "batch_size": batch_size,
        "MAE": np.mean(mae_scores),
        "RMSE": np.mean(rmse_scores),
        "MAPE": np.mean(mape_scores)
    })

    print(f"Avg MAE: {np.mean(mae_scores):.4f} | RMSE: {np.mean(rmse_scores):.4f} | MAPE: {np.mean(mape_scores):.4f}")

In [None]:
import pandas as pd

df_tcn = pd.DataFrame(tcn_results)
df_tcn = df_tcn.sort_values(by="RMSE").reset_index(drop=True)
df_tcn

Hyperparameter tunning (lr = 0.0005)

In [None]:
from itertools import product

# Grid options
num_channels_list = [
    [64, 64],
    [128, 64],
    [128, 128]
]

kernel_sizes = [2, 3, 5]
dropouts = [0.0, 0.1, 0.2]
learning_rates = [0.0005]
batch_sizes = [32]

# Generate combinations
tcn_param_grid = list(product(num_channels_list, kernel_sizes, dropouts, learning_rates, batch_sizes))
print(f"Total combinations: {len(tcn_param_grid)}")

In [None]:
tcn_results = []

for i, (channels, kernel_size, dropout, lr, batch_size) in enumerate(tcn_param_grid):
    print(f"\n Config {i+1}/{len(tcn_param_grid)} â†’ channels={channels}, kernel={kernel_size}, dropout={dropout}, lr={lr}, batch={batch_size}")

    mae_scores = []
    rmse_scores = []
    mape_scores = []

    for fold, (X_tr, y_tr, X_val, y_val) in enumerate(
        expanding_window_cv(X, y, initial_train_size=1095, horizon=14, step=14, sequence_len=30)
    ):
        mae, rmse, mape = train_one_tcn_fold(
            X_tr, y_tr, X_val, y_val,
            sequence_len=30,
            num_epochs=50,
            patience=5,
            batch_size=batch_size,
            lr=lr,
            num_channels=channels,
            kernel_size=kernel_size,
            dropout=dropout
        )

        if mae is None:
            continue

        mae_scores.append(mae)
        rmse_scores.append(rmse)
        mape_scores.append(mape)

    # Save results
    tcn_results.append({
        "channels": channels,
        "kernel_size": kernel_size,
        "dropout": dropout,
        "lr": lr,
        "batch_size": batch_size,
        "MAE": np.mean(mae_scores),
        "RMSE": np.mean(rmse_scores),
        "MAPE": np.mean(mape_scores)
    })

    print(f"â†’ Avg MAE: {np.mean(mae_scores):.4f} | RMSE: {np.mean(rmse_scores):.4f} | MAPE: {np.mean(mape_scores):.4f}")

In [None]:
import pandas as pd

df_tcn = pd.DataFrame(tcn_results)
df_tcn = df_tcn.sort_values(by="RMSE").reset_index(drop=True)
df_tcn