In [6]:
# --- START OF FILE Notebook3GPU-Models.ipynb ---

#######################################################################
# 0. Environment set‑up                                               #
#######################################################################
# ☑️  Install all dependencies.
#    Ensure your environment has GPU support for PyTorch if running on GPU nodes.
#    Note: 'neuralforecast' usually pulls 'pytorch-lightning' and 'torch'.
# !pip install --quiet pandas pyarrow matplotlib seaborn numpy scikit-learn tqdm \
#     torch pytorch-lightning neuralforecast

import os
import warnings
import random
import gc
import itertools
import time
from pathlib import Path
from typing import List, Dict, Tuple, Optional

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.auto import tqdm

# For Data Scaling
from sklearn.preprocessing import StandardScaler
# For Model Evaluation
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_squared_log_error

# PyTorch specific imports (for device management, though NeuralForecast handles much)
import torch

# For EarlyStopping in PyTorch Lightning (used by NeuralForecast internally)
import pytorch_lightning as pl
from pytorch_lightning.callbacks.early_stopping import EarlyStopping

# For NeuralForecast models
from neuralforecast import NeuralForecast
from neuralforecast.models import PatchTST

# Suppress minor warnings for cleaner output in Jupyter
warnings.filterwarnings("ignore")

# Ensure plots appear inline in Jupyter Notebook
# %matplotlib inline

# Set plotting style
sns.set_style("whitegrid")
plt.rcParams['figure.dpi'] = 150 # Increase resolution for better quality plots
plt.rcParams['savefig.dpi'] = 300 # Save plots with higher resolution

# Seed for reproducibility
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

# Check for GPU availability
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {DEVICE}")

#######################################################################
# 1. Configuration                                                    #
#######################################################################

# --- Paths & File Names ---
# Input: Directory containing your INDIVIDUAL processed hourly Parquet files (from Notebook 1)
INPUT_PROCESSED_HOURLY_DIR = Path("outputs/ch4") 

# Output root directory for results (figures, metrics, predictions)
OUT_DIR           = Path("outputs/ch4").absolute(); OUT_DIR.mkdir(parents=True, exist_ok=True)
PREDICTIONS_DIR = OUT_DIR / "predictions"; PREDICTIONS_DIR.mkdir(exist_ok=True, parents=True)
METRICS_FILE_PATH = OUT_DIR / "all_model_results.csv" # Path to the aggregated metrics CSV
LOSS_PNG_DIR = OUT_DIR / "loss_curves"          # <─ new

# --- Data Specifics ---
TARGET_COL        = "in_packets"           # The volume metric (packets)
ROUTER_COL        = "router"               # Column for router name
ANOM_SCORE_COL    = "if_score"             
ANOM_FLAG_COL     = "if_flag"              

# --- Time Series Parameters ---
LOOKBACK          = 336                     # Sliding window length (hours)
HORIZONS          = [1, 6, 12, 24]         # Prediction horizons
TEST_SIZE_HOURS   = 7 * 24                 # 7 days for testing
VAL_SIZE_HOURS    = 7 * 24                 # 7 days for validation (used for early stopping)

# --- Model Training Parameters ---
BATCH_SIZE        = 128
EPOCHS            = 400                    # Total epochs for training (subject to early stopping)
EARLY_STOP_PATIENCE =20                   # Patience for early stopping
LEARNING_RATE     = 1e-3
NF_MAX_STEPS      = EPOCHS                 # NeuralForecast uses max_steps instead of epochs
NF_VALIDATION_STEPS = 1 # Number of steps to check validation. Set to 1 for per-epoch check or longer for faster runs.
# NF_VAL_SIZE_FRACTION = 0.2 # This isn't directly used for the callbacks in NeuralForecast with 'val_check_steps' set to 1.
# NF uses `val_check_steps` if `val_loader` is present, or `limit_train_batches` if not.
# For simple EarlyStopping on `train_loss`, a dedicated validation set is not passed to NF.

# --- NeuralForecast specific parameters ---\
NF_LOOKBACK_SIZE = LOOKBACK 
NF_MODEL_MAX_HORIZON = max(HORIZONS) 
FREQ              = "h"                    # Hourly frequency for NeuralForecast

print(f"Configuration loaded.")
print(f"Input processed hourly data from: {INPUT_PROCESSED_HOURLY_DIR}")
print(f"Outputs will be saved to: {OUT_DIR} (predictions: {PREDICTIONS_DIR}, metrics: {METRICS_FILE_PATH})")
print(f"Time series lookback: {LOOKBACK} hours, horizons: {HORIZONS}")
print(f"Training Epochs (NeuralForecast max_steps): {NF_MAX_STEPS}, Early Stop Patience: {EARLY_STOP_PATIENCE}")
print(f"Using device: {DEVICE}")

#######################################################################
# 2. Evaluation Metrics (Expanded)                                    #
#######################################################################

def calculate_metrics(y_true: np.ndarray, y_pred: np.ndarray) -> Dict[str, float]:
    """
    Calculates MAE, RMSE, sMAPE, MSLE, and P99 Absolute Error.
    Ensures non-negative inputs for relevant metrics to avoid errors.
    """
    # Ensure y_true and y_pred are non-negative for log/percentage metrics
    y_true_safe = np.maximum(y_true, 0)
    y_pred_safe = np.maximum(y_pred, 0)
    
    mae = mean_absolute_error(y_true_safe, y_pred_safe)
    rmse = np.sqrt(mean_squared_error(y_true_safe, y_pred_safe))
    
    # sMAPE: Symmetric Mean Absolute Percentage Error
    # Add a small epsilon to the denominator to prevent division by zero
    smape_denominator = (np.abs(y_true_safe) + np.abs(y_pred_safe))
    if np.sum(smape_denominator) == 0:
        smape = 0.0
    else:
        smape = np.mean(2 * np.abs(y_pred_safe - y_true_safe) / (smape_denominator + 1e-8)) * 100
    
    # MSLE: Mean Squared Logarithmic Error
    # Add epsilon before log to prevent log(0) and handle cases where all values are zero
    if np.all(y_true_safe == 0) and np.all(y_pred_safe == 0):
        msle = 0.0 # If both are consistently zero, error is zero
    else:
        msle = mean_squared_log_error(y_true_safe + 1e-8, y_pred_safe + 1e-8) 
    
    # P99 Absolute Error
    p99_abs_error = np.percentile(np.abs(y_true_safe - y_pred_safe), 99)
    
    return {
        "MAE": mae,
        "RMSE": rmse,
        "sMAPE": smape,
        "MSLE": msle,
        "P99_abs_error": p99_abs_error
    }

# ─── Lightning callback: record loss each epoch & save a PNG ───
class LossCurveSaver(pl.callbacks.Callback):
    """Collects train_loss each epoch and writes loss_<variant>.png when fit() ends."""
    def __init__(self, variant_key: str, out_dir: Path):
        super().__init__()
        self.variant_key = variant_key
        self.out_dir     = out_dir
        self.history     = []

    def on_train_epoch_end(self, trainer, pl_module):
        # Lightning puts the running average in 'train_loss'
        loss = trainer.callback_metrics.get("train_loss_epoch",
                                            trainer.callback_metrics.get("train_loss"))
        if loss is not None:
            self.history.append(float(loss))

    def on_train_end(self, trainer, pl_module):
        if not self.history:
            return
        self.out_dir.mkdir(parents=True, exist_ok=True)
        import matplotlib.pyplot as plt
        plt.figure()
        plt.plot(self.history, lw=2)
        plt.title(self.variant_key)
        plt.xlabel("epoch"); plt.ylabel("train loss")
        plt.tight_layout()
        png = self.out_dir / f"loss_{self.variant_key}.png"
        plt.savefig(png, dpi=200)
        plt.close()
        print(f"· loss curve saved → {png.relative_to(Path.cwd())}")
# ----------------------------------------------------------------


#######################################################################
# 3. Model Experiment Runner for NeuralForecast Models                #
#######################################################################

def run_nf_experiment(model_type: str, df_full_scaled: pd.DataFrame, train_df: pd.DataFrame,
                      test_df: pd.DataFrame, target_scaler: StandardScaler,
                      router_label: str, exp_config: Dict, all_results: List[Dict], freq_param: str):
    """
    Runs a NeuralForecast model (PatchTST) with anomaly integration strategies.
    `df_full_scaled` is the complete time series with scaled features, including anomalies, for cross-validation.
    `train_df` is the potentially masked/modified training split.
    """
    exp_name = exp_config['name'].replace("NF", model_type)
    print(f"\n    --- Running NeuralForecast Experiment: {exp_name} ({model_type}) ---")

    # Prepare training DataFrame for NeuralForecast (handling masking)
    nf_train_df = train_df.copy() 
    if exp_config['mask_out']:
        initial_rows = len(nf_train_df)
        nf_train_df = nf_train_df[nf_train_df[ANOM_FLAG_COL] == 0].copy()
        print(f"    Masked out {initial_rows - len(nf_train_df)} anomalous periods for training.")
    
    
    # callback objects -------------------------------------------------
    variant_key   = f"{model_type}_{exp_name}"
    curve_cb      = LossCurveSaver(variant_key, LOSS_PNG_DIR)   # <─ new
    early_stop_cb = EarlyStopping(monitor="train_loss",
                                  patience=EARLY_STOP_PATIENCE,
                                  mode="min",
                                  check_on_train_epoch_end=True)
    # -----------------------------------------------------------------
    
    # Prepare `nf_train_df` columns for NeuralForecast
    nf_train_df = nf_train_df.reset_index().rename(columns={nf_train_df.index.name: "ds", TARGET_COL + '_scaled': "y"})
    nf_train_df['unique_id'] = router_label
    
    # exogenous_features = []
    # # Only add exogenous features if use_score is True (PatchTST supports them)
    # if exp_config['use_score']: 
    #     exogenous_features.append(ANOM_SCORE_COL + '_scaled')
    
    # ---- inside run_nf_experiment before building the model ----
    if exp_config['use_score']:
        print(f"WARNING: {model_type} does not support historical exogenous vars. "
              f"Skipping experiment {exp_name}.")
        return            # <- just exit this experiment
    exogenous_features = []        # leave list empty
        
    nf_train_df = nf_train_df[['unique_id', 'ds', 'y'] + exogenous_features]

    # Check if training data is empty after masking or for other reasons
    if nf_train_df.empty:
        print(f"    Warning: Training data for {exp_name} is empty. Skipping training and evaluation.")
        return

    # Define model instance for PatchTST
    patchtst_model_instance = PatchTST(
        h=NF_MODEL_MAX_HORIZON, 
        input_size=NF_LOOKBACK_SIZE,
        # hist_exog_list=exogenous_features if exogenous_features else None, 
        max_steps=NF_MAX_STEPS, 
        learning_rate=LEARNING_RATE, 
        random_seed=SEED,
        scaler_type=None, # We use sklearn.StandardScaler externally
        # callbacks=[EarlyStopping(monitor="train_loss", patience=EARLY_STOP_PATIENCE, mode="min", check_on_train_epoch_end=True)],
        callbacks=[early_stop_cb, curve_cb],
        accelerator=str(DEVICE), # Convert torch.device to string 'cuda' or 'cpu'
        val_check_steps=NF_VALIDATION_STEPS, # How often to check validation loss
        batch_size=BATCH_SIZE,
        alias="PatchTST" 
    )

    nf_instance = NeuralForecast(models=[patchtst_model_instance], freq=freq_param) 

    print(f"    Fitting {model_type}...")
    start_time = time.time()
    nf_instance.fit(df=nf_train_df)
    train_duration = time.time() - start_time
    print(f"    {model_type} fitted. Duration: {train_duration:.2f} seconds.")

    # Perform cross-validation to get predictions for the test set
    # `df_full_for_nf` must include all historical data up to the end of the test set,
    # and all necessary features (target and exogenous).
    df_full_for_nf = df_full_scaled.reset_index().rename(columns={df_full_scaled.index.name: "ds", TARGET_COL + '_scaled': "y"})
    df_full_for_nf['unique_id'] = router_label
    
    nf_df_cols_full = ['unique_id', 'ds', 'y'] + (exogenous_features if exogenous_features else [])
    df_full_for_nf = df_full_for_nf[nf_df_cols_full]

    start_time = time.time()
    # NeuralForecast's cross_validation predicts for horizons from the end of each window.
    # It will automatically manage the sliding window for prediction.
    nf_predictions_df = nf_instance.cross_validation(
        df=df_full_for_nf,
        val_size=VAL_SIZE_HOURS, # Validation size for splitting data for cross-validation
        test_size=TEST_SIZE_HOURS, # Test size for final evaluation window
        n_windows=None  #Only one forecast window for the test set
    )
    
        # keep only this router and guarantee integer horizon
    # keep only this router
    nf_predictions_df = nf_predictions_df.query("unique_id == @router_label").copy()

    # ------------------------------------------------------------------
    # (NEW) rebuild horizon if the column is missing
    if 'horizon' not in nf_predictions_df.columns:
        # make sure we’re dealing with datetimes
        nf_predictions_df['cutoff'] = pd.to_datetime(nf_predictions_df['cutoff'])
        nf_predictions_df['ds']     = pd.to_datetime(nf_predictions_df['ds'])

        # horizon in hours: (forecast timestamp – cutoff timestamp)
        nf_predictions_df['horizon'] = (
            (nf_predictions_df['ds'] - nf_predictions_df['cutoff'])
            .dt.total_seconds()
            .div(3600)
            .astype(int)
        )
    # ------------------------------------------------------------------

    # (always) coerce to int so the equality test below works
    nf_predictions_df['horizon'] = nf_predictions_df['horizon'].astype(int)
    #     # ----------------------- NEW / FIX --------------------------------
    # # robust horizon reconstruction and dtype coercion
    # nf_predictions_df['cutoff'] = pd.to_datetime(nf_predictions_df['cutoff'])
    # nf_predictions_df['ds']     = pd.to_datetime(nf_predictions_df['ds'])
    # nf_predictions_df['horizon'] = (
    #         (nf_predictions_df['ds'] - nf_predictions_df['cutoff'])
    #         /  np.timedelta64(1, "h")
    # ).round().astype(int)                 # ★★ int dtype guaranteed
    # nf_predictions_df = nf_predictions_df.query("horizon >= 1")  # keep 1..h
    # # ------------------------------------------------------------------
    
    inference_duration = time.time() - start_time
    print(f"    {model_type} prediction done. Duration: {inference_duration:.2f} seconds.")

    # Re-create the 'horizon' column manually for NF_predictions_df if needed
    if 'horizon' not in nf_predictions_df.columns:
        print("  'horizon' column not found in NF predictions. Recreating it.")
        nf_predictions_df['cutoff'] = pd.to_datetime(nf_predictions_df['cutoff'])
        nf_predictions_df['ds'] = pd.to_datetime(nf_predictions_df['ds'])
        # Calculate horizon in hours
        nf_predictions_df['horizon'] = ((nf_predictions_df['ds'] - nf_predictions_df['cutoff']).dt.total_seconds() // 3600).astype(int)
        # Filter out horizon <= 0 (historical/current values) if any
        nf_predictions_df = nf_predictions_df[nf_predictions_df['horizon'] > 0].copy()
        print("  Horizon column recreated — sample values:", np.sort(nf_predictions_df["horizon"].unique()[:10]))


    # Evaluate and store results, and save predictions for later plotting
    for horizon in HORIZONS:
        nf_preds_for_horizon = nf_predictions_df[nf_predictions_df['horizon'] == horizon]
        
        y_true_original_h = np.array([])
        y_pred_original_h = np.array([])
        m = {k: np.nan for k in ["MAE", "RMSE", "sMAPE", "MSLE", "P99_abs_error"]} # Initialize metrics to NaN

        if nf_preds_for_horizon.empty:
            print(f"    No predictions available for Horizon {horizon}h after filtering. Setting metrics to NaN.")
        else:
            y_true_scaled_h = nf_preds_for_horizon['y'].values
            y_pred_scaled_h = nf_preds_for_horizon[model_type].values # Predictions are in a column named after the model_type
            
            y_true_original_h = target_scaler.inverse_transform(y_true_scaled_h.reshape(-1, 1)).flatten()
            y_pred_original_h = target_scaler.inverse_transform(y_pred_scaled_h.reshape(-1, 1)).flatten()
            
            m = calculate_metrics(y_true_original_h, y_pred_original_h)

        # Save predictions for this horizon
        if y_pred_original_h.size > 0 and y_true_original_h.size > 0: # Check for non-empty arrays
            predictions_df = pd.DataFrame({
                'ds': nf_preds_for_horizon['ds'].values, # Timestamps from NF's output
                'y_true': y_true_original_h,
                'y_pred': y_pred_original_h
            })
            predictions_path = PREDICTIONS_DIR / f"{model_type}_{exp_name}_H{horizon}_predictions_{router_label}.parquet"
            predictions_df.to_parquet(predictions_path, index=False)
            print(f"      Saved predictions for {exp_name} H{horizon} to {predictions_path.name}")
        else:
            print(f"      Skipping saving predictions for {exp_name} H{horizon} due to empty or mis-aligned data.")

        print(f"    Horizon {horizon}h Metrics: MAE={m['MAE']:.2f}, RMSE={m['RMSE']:.2f}, sMAPE={m['sMAPE']:.2f}%, MSLE={m['MSLE']:.4f}, P99_abs_error={m['P99_abs_error']:.2f}")
        
        result_row = {
            "model": model_type,
            "experiment": exp_name,
            "horizon": horizon,
            "router": router_label,
            "train_time_sec": train_duration,
            "inference_time_sec": inference_duration,
            **m
        }
        all_results.append(result_row)
    
    # Clean up memory
    del patchtst_model_instance, nf_instance, nf_train_df, df_full_for_nf, nf_predictions_df
    gc.collect()


#######################################################################
# 4. Main Execution for Notebook 3                                    #
#######################################################################

def main():
    print("\n--- Initiating Notebook 3: GPU-Friendly Model Training & Evaluation (PatchTST) ---")

    # 1. Load all individual processed hourly Parquet files
    print(f"\nLoading all processed hourly data from: {INPUT_PROCESSED_HOURLY_DIR}/*.parquet")
    all_processed_hourly_files = sorted(list(INPUT_PROCESSED_HOURLY_DIR.glob("hourly_*_processed_with_anomalies.parquet")))
    
    if not all_processed_hourly_files:
        print(f"Error: No processed hourly Parquet files found in {INPUT_PROCESSED_HOURLY_DIR}. Please run Notebook 1 first.")
        return

    # Load all into a single Pandas DataFrame (feasible as these are small files)
    list_df_hourly = []
    for file_path in tqdm(all_processed_hourly_files, desc="Loading hourly files"):
        try:
            df = pd.read_parquet(file_path)
            # Ensure router column is present and correct (it should be from Notebook 1)
            router_label_from_file = file_path.stem.replace('hourly_', '').replace('_processed_with_anomalies', '')
            if ROUTER_COL not in df.columns:
                df[ROUTER_COL] = router_label_from_file
            list_df_hourly.append(df)
        except Exception as e:
            print(f"Warning: Could not load {file_path.name}: {e}. Skipping this file.")
            continue
    
    if not list_df_hourly:
        print("No valid hourly processed data loaded. Exiting.")
        return

    df_combined_hourly = pd.concat(list_df_hourly, ignore_index=False) # Keep original index (timestamps)
    print(f"Loaded combined hourly data: {len(df_combined_hourly)} rows from {len(list_df_hourly)} routers.")
    del list_df_hourly # Free memory
    gc.collect()

    # Get unique routers for iteration
    unique_routers = df_combined_hourly[ROUTER_COL].unique().tolist()
    unique_routers.sort()
    print(f"Found {len(unique_routers)} unique routers: {unique_routers}")

    # Initialize master results list
    all_results = []

    # Try to load existing results to append to them
    if METRICS_FILE_PATH.exists():
        try:
            existing_results_df = pd.read_csv(METRICS_FILE_PATH)
            all_results.extend(existing_results_df.to_dict('records'))
            print(f"Loaded {len(existing_results_df)} existing results from {METRICS_FILE_PATH}.")
        except Exception as e:
            print(f"Warning: Could not load existing metrics file {METRICS_FILE_PATH}: {e}. Starting with empty results.")
    else:
        print(f"No existing metrics file found at {METRICS_FILE_PATH}. Starting with empty results.")


    # Iterate through each router for processing
    for router_idx, router_label in enumerate(tqdm(unique_routers, desc="Processing Routers for Modeling (PatchTST)")):
        print(f"\n==== Starting Model Training & Evaluation for Router: {router_label} ({router_idx + 1}/{len(unique_routers)}) ====")
        
        # Filter data for current router
        df_hourly_processed_single_router = df_combined_hourly[df_combined_hourly[ROUTER_COL] == router_label].copy()
        
        if df_hourly_processed_single_router.empty:
            print(f"  Warning: No hourly data found for {router_label}. Skipping this router.")
            continue

        # Safeguard: Ensure no NaNs in critical columns
        initial_rows = len(df_hourly_processed_single_router)
        df_hourly_processed_single_router.dropna(subset=[TARGET_COL, ANOM_SCORE_COL], inplace=True)
        if len(df_hourly_processed_single_router) < initial_rows:
            print(f"  Dropped {initial_rows - len(df_hourly_processed_single_router)} rows with NaNs in target/anomaly columns for {router_label}.")
        
        # Time Series Splitting
        print("\n  --- Splitting Data into Train/Validation/Test Sets ---")
        total_len = len(df_hourly_processed_single_router)
        
        min_data_required = TEST_SIZE_HOURS + VAL_SIZE_HOURS + LOOKBACK + max(HORIZONS)
        if total_len < min_data_required:
            print(f"  Warning: Not enough data for requested split sizes and lookback/horizons for {router_label}. Total: {total_len} hours. Needed at least: {min_data_required} hours. Skipping this router.")
            continue

        test_start_idx = total_len - TEST_SIZE_HOURS
        val_start_idx = test_start_idx - VAL_SIZE_HOURS 
        
        # Train, Validation, Test splits for SCALING and for NF cross-validation
        train_df = df_hourly_processed_single_router.iloc[:val_start_idx].copy()
        val_df = df_hourly_processed_single_router.iloc[val_start_idx:test_start_idx].copy()
        test_df = df_hourly_processed_single_router.iloc[test_start_idx:].copy()

        # Combine train, val, test for full scaled series needed by NeuralForecast cross_validation
        df_full_scaled_router = pd.concat([train_df, val_df, test_df], axis=0) # Concatenate only for current router

        print(f"  Train set size: {len(train_df)} hours (from {train_df.index.min()} to {train_df.index.max()})\n"
              f"  Validation set size: {len(val_df)} hours (from {val_df.index.min()} to {val_df.index.max()})\n"
              f"  Test set size: {len(test_df)} hours (from {test_df.index.min()} to {test_df.index.max()})")


        # Data Scaling
        print("\n  --- Scaling Time Series Data ---")
        target_scaler = StandardScaler()
        # Fit scaler on training data, transform all splits
        df_full_scaled_router[TARGET_COL + '_scaled'] = target_scaler.fit_transform(df_full_scaled_router[[TARGET_COL]])

        anomaly_score_scaler = StandardScaler()
        # Fit scaler on training data, transform all splits
        df_full_scaled_router[ANOM_SCORE_COL + '_scaled'] = anomaly_score_scaler.fit_transform(df_full_scaled_router[[ANOM_SCORE_COL]])
        print("  Data scaled successfully using StandardScaler.")

        # Re-derive train_df, val_df, test_df from scaled df_full_scaled_router
        train_df_scaled = df_full_scaled_router.iloc[:val_start_idx].copy()
        val_df_scaled = df_full_scaled_router.iloc[val_start_idx:test_start_idx].copy()
        test_df_scaled = df_full_scaled_router.iloc[test_start_idx:].copy()


        # --- Define PatchTST Experiments configurations ---
        patchtst_experiments_configs = [
            {"name": "PatchTST_raw",                "mask_out": False, "use_score": False},
            {"name": "PatchTST_mask_out",           "mask_out": True,  "use_score": False},
            {"name": "PatchTST_score_as_feature",   "mask_out": False, "use_score": True},
            {"name": "PatchTST_both",               "mask_out": True,  "use_score": True}
        ]

        # --- Loop through each PatchTST experiment for the current router ---
        # This is the corrected loop that defines `exp`
        for exp in patchtst_experiments_configs: 
            run_nf_experiment(
                model_type="PatchTST",
                df_full_scaled=df_full_scaled_router, # Pass the router-specific combined scaled df
                train_df=train_df_scaled, # Pass the scaled train_df
                test_df=test_df_scaled,   # Pass the scaled test_df
                target_scaler=target_scaler,
                router_label=router_label,
                exp_config=exp, # `exp` is now correctly defined here
                all_results=all_results,
                freq_param=FREQ
            )
            # gc.collect() # run_nf_experiment already cleans up its internal DFs. No need for an extra gc here.
        
        # Clean up router-specific DataFrames after all experiments for this router are done
        del df_hourly_processed_single_router, train_df, val_df, test_df, df_full_scaled_router, \
            train_df_scaled, val_df_scaled, test_df_scaled
        gc.collect()
    
    # --- Final Save of all results ---
    final_results_df = pd.DataFrame(all_results)
    # Drop duplicates to prevent re-runs from adding redundant rows
    final_results_df.drop_duplicates(
        subset=['model', 'experiment', 'horizon', 'router'], 
        inplace=True
    )
    print(f"\nRemoved {len(all_results) - len(final_results_df)} duplicate result rows.")

    final_results_df.to_csv(METRICS_FILE_PATH, index=False)
    print(f"\nAll model results (including PatchTST) saved to: {METRICS_FILE_PATH}")

    print("\n--- Notebook 3 Complete: GPU-Friendly Model Training & Evaluation (PatchTST) ---")

# Execute the main pipeline for this notebook
if __name__ == "__main__":
    main()

# --- END OF FILE Notebook3GPU-Models.ipynb ---

Using device: cuda
Configuration loaded.
Input processed hourly data from: outputs/ch4
Outputs will be saved to: /home/ramamurthy/mhnarfth/network_analysis/individually process kora/outputs/ch4 (predictions: /home/ramamurthy/mhnarfth/network_analysis/individually process kora/outputs/ch4/predictions, metrics: /home/ramamurthy/mhnarfth/network_analysis/individually process kora/outputs/ch4/all_model_results.csv)
Time series lookback: 336 hours, horizons: [1, 6, 12, 24]
Training Epochs (NeuralForecast max_steps): 400, Early Stop Patience: 20
Using device: cuda

--- Initiating Notebook 3: GPU-Friendly Model Training & Evaluation (PatchTST) ---

Loading all processed hourly data from: outputs/ch4/*.parquet


Loading hourly files: 100%|██████████| 10/10 [00:00<00:00, 572.63it/s]

Loaded combined hourly data: 12604 rows from 10 routers.





Found 10 unique routers: ['atlanta', 'batonrouge', 'boston', 'dallas', 'elpaso', 'jackson', 'jacksonville', 'louisville', 'phoenix', 'reno']
Loaded 480 existing results from /home/ramamurthy/mhnarfth/network_analysis/individually process kora/outputs/ch4/all_model_results.csv.


Processing Routers for Modeling (PatchTST):   0%|          | 0/10 [00:00<?, ?it/s][rank: 0] Seed set to 42
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer(val_check_interval=1)` was configured so validation will run after every batch.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type              | Params | Mode 
-----------------------------------------------------------
0 | loss         | MAE               | 0      | train
1 | padder_train | ConstantPad1d     | 0      | train
2 | scaler       | TemporalNorm      | 0      | train
3 | model        | PatchTST_backbone | 534 K  | train
-----------------------------------------------------------
534 K     Trainable params
3         Non-trainable params
534 K     Total params
2.136     Total estimated model params size (MB)
90        Modules in train mode
0         Modules in eval mode
SLURM auto-requeueing enabled. Setting signal handlers.



==== Starting Model Training & Evaluation for Router: atlanta (1/10) ====

  --- Splitting Data into Train/Validation/Test Sets ---
  Train set size: 1033 hours (from 2021-10-07 23:00:00 to 2021-11-19 23:00:00)
  Validation set size: 168 hours (from 2021-11-20 00:00:00 to 2021-11-26 23:00:00)
  Test set size: 168 hours (from 2021-11-27 00:00:00 to 2021-12-03 23:00:00)

  --- Scaling Time Series Data ---
  Data scaled successfully using StandardScaler.

    --- Running NeuralForecast Experiment: PatchTST_raw (PatchTST) ---
    Fitting PatchTST...
Epoch 0: 100%|██████████| 1/1 [00:00<00:00, 17.85it/s, v_num=1.06e+7, train_loss_step=0.928]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 585.39it/s][A
Epoch 1: 100%|██████████| 1/1 [00:00<00:00, 18.59it/s, v_num=1.06e+7, train_loss_step=1.460, train_loss_epoch=0

Trainer already configured with model summary callbacks: [<class 'pytorch_lightning.callbacks.model_summary.ModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer(val_check_interval=1)` was configured so validation will run after every batch.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type              | Params | Mode 
-----------------------------------------------------------
0 | loss         | MAE               | 0      | train
1 | padder_train | ConstantPad1d     | 0      | train
2 | scaler       | TemporalNorm      | 0      | train
3 | model        | PatchTST_backbone | 534 K  | train
-----------------------------------------------------------
534 K     Trainable params
3         Non-trainable params
534 K     Total params
2.136     Total estimated model params size (MB)
90        Modules in train mode
0         Modules in ev

    PatchTST fitted. Duration: 14.54 seconds.
Epoch 0: 100%|██████████| 1/1 [00:00<00:00, 23.37it/s, v_num=1.06e+7, train_loss_step=0.204]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 167.77it/s][A
Epoch 0: 100%|██████████| 1/1 [00:00<00:00, 17.98it/s, v_num=1.06e+7, train_loss_step=0.204, valid_loss=0.587, train_loss_epoch=0.204]· loss curve saved → outputs/ch4/loss_curves/loss_PatchTST_PatchTST_raw.png
Epoch 0: 100%|██████████| 1/1 [00:00<00:00,  4.26it/s, v_num=1.06e+7, train_loss_step=0.204, valid_loss=0.587, train_loss_epoch=0.204]


Trainer already configured with model summary callbacks: [<class 'pytorch_lightning.callbacks.model_summary.ModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer(val_check_interval=1)` was configured so validation will run after every batch.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
SLURM auto-requeueing enabled. Setting signal handlers.


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 179.35it/s]
    PatchTST prediction done. Duration: 0.40 seconds.
      Saved predictions for PatchTST_raw H1 to PatchTST_PatchTST_raw_H1_predictions_atlanta.parquet
    Horizon 1h Metrics: MAE=158907101.05, RMSE=277169674.89, sMAPE=97.45%, MSLE=76.9655, P99_abs_error=827613574.72
      Saved predictions for PatchTST_raw H6 to PatchTST_PatchTST_raw_H6_predictions_atlanta.parquet
    Horizon 6h Metrics: MAE=164689274.06, RMSE=211702814.44, sMAPE=101.17%, MSLE=52.1402, P99_abs_error=448723319.04
      Saved predictions for PatchTST_raw H12 to PatchTST_PatchTST_raw_H12_predictions_atlanta.parquet
    Horizon 12h Metrics: MAE=157737521.33, RMSE=200618928.70, sMAPE=106.39%, MSLE=71.5044, P99_abs_error=495606060.80
      Saved predictions for PatchTST_raw H24 to PatchTST_PatchTST_raw_H24_predictions_atlanta.parquet
    Horizon 24h Metrics: MAE=146288352.30, RMSE=180987668.23, sMAPE=99.60%, MSLE=19.2687, P99_abs_error=543133663.36


[rank: 0] Seed set to 42
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer(val_check_interval=1)` was configured so validation will run after every batch.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type              | Params | Mode 
-----------------------------------------------------------
0 | loss         | MAE               | 0      | train
1 | padder_train | ConstantPad1d     | 0      | train
2 | scaler       | TemporalNorm      | 0      | train
3 | model        | PatchTST_backbone | 534 K  | train
-----------------------------------------------------------
534 K     Trainable params
3         Non-trainable params
534 K     Total params
2.136     Total estimated model params size (MB)
90        Modules in train mode
0         Modules in eval mode
SLURM auto-requeueing enabled. Setting signal handlers.



    --- Running NeuralForecast Experiment: PatchTST_mask_out (PatchTST) ---
    Masked out 13 anomalous periods for training.
    Fitting PatchTST...
Epoch 0: 100%|██████████| 1/1 [00:00<00:00, 22.82it/s, v_num=1.06e+7, train_loss_step=0.878]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 580.29it/s][A
Epoch 1: 100%|██████████| 1/1 [00:00<00:00, 23.62it/s, v_num=1.06e+7, train_loss_step=1.360, train_loss_epoch=0.878]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 595.44it/s][A
Epoch 2: 100%|██████████| 1/1 [00:00<00:00, 23.60it/s, v_num=1.06e+7, train_loss_step=0.601, train_loss_epoch=1.360]
Validation: |          | 0/? [00:00<

`Trainer.fit` stopped: `max_steps=400` reached.


· loss curve saved → outputs/ch4/loss_curves/loss_PatchTST_PatchTST_mask_out.png
Epoch 399: 100%|██████████| 1/1 [00:00<00:00,  3.32it/s, v_num=1.06e+7, train_loss_step=0.131, train_loss_epoch=0.131]

Trainer already configured with model summary callbacks: [<class 'pytorch_lightning.callbacks.model_summary.ModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer(val_check_interval=1)` was configured so validation will run after every batch.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type              | Params | Mode 
-----------------------------------------------------------
0 | loss         | MAE               | 0      | train
1 | padder_train | ConstantPad1d     | 0      | train
2 | scaler       | TemporalNorm      | 0      | train
3 | model        | PatchTST_backbone | 534 K  | train
-----------------------------------------------------------
534 K     Trainable params
3         Non-trainable params
534 K     Total params
2.136     Total estimated model params size (MB)
90        Modules in train mode
0         Modules in ev


    PatchTST fitted. Duration: 26.92 seconds.
Epoch 0: 100%|██████████| 1/1 [00:00<00:00, 17.70it/s, v_num=1.06e+7, train_loss_step=0.330]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 83.21it/s][A
Epoch 1: 100%|██████████| 1/1 [00:00<00:00, 22.64it/s, v_num=1.06e+7, train_loss_step=0.876, valid_loss=0.443, train_loss_epoch=0.330]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 83.49it/s][A
Epoch 2: 100%|██████████| 1/1 [00:00<00:00, 22.79it/s, v_num=1.06e+7, train_loss_step=0.505, valid_loss=0.339, train_loss_epoch=0.876]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/1 [00:00<?, ?it/s][A
Vali

Trainer already configured with model summary callbacks: [<class 'pytorch_lightning.callbacks.model_summary.ModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer(val_check_interval=1)` was configured so validation will run after every batch.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
SLURM auto-requeueing enabled. Setting signal handlers.


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 183.86it/s]
    PatchTST prediction done. Duration: 1.20 seconds.
      Saved predictions for PatchTST_mask_out H1 to PatchTST_PatchTST_mask_out_H1_predictions_atlanta.parquet
    Horizon 1h Metrics: MAE=95317834.58, RMSE=143300480.74, sMAPE=62.33%, MSLE=10.4319, P99_abs_error=480696734.08
      Saved predictions for PatchTST_mask_out H6 to PatchTST_PatchTST_mask_out_H6_predictions_atlanta.parquet
    Horizon 6h Metrics: MAE=113060584.44, RMSE=158049195.13, sMAPE=68.70%, MSLE=1.5739, P99_abs_error=462745192.96
      Saved predictions for PatchTST_mask_out H12 to PatchTST_PatchTST_mask_out_H12_predictions_atlanta.parquet
    Horizon 12h Metrics: MAE=125867782.27, RMSE=168404855.78, sMAPE=80.13%, MSLE=2.1829, P99_abs_error=498618111.36
      Saved predictions for PatchTST_mask_out H24 to PatchTST_PatchTST_mask_out_H24_predictions_atlanta.parquet
    Horizon 24h Metrics: MAE=137088289.30, RMSE=182082822.26, sMAPE=92.70%, MSLE=6.3

Processing Routers for Modeling (PatchTST):  10%|█         | 1/10 [00:43<06:35, 43.99s/it][rank: 0] Seed set to 42



    --- Running NeuralForecast Experiment: PatchTST_score_as_feature (PatchTST) ---

    --- Running NeuralForecast Experiment: PatchTST_both (PatchTST) ---
    Masked out 13 anomalous periods for training.

==== Starting Model Training & Evaluation for Router: batonrouge (2/10) ====

  --- Splitting Data into Train/Validation/Test Sets ---
  Train set size: 1007 hours (from 2021-10-07 18:00:00 to 2021-11-18 16:00:00)
  Validation set size: 168 hours (from 2021-11-18 17:00:00 to 2021-11-25 16:00:00)
  Test set size: 168 hours (from 2021-11-25 17:00:00 to 2021-12-02 16:00:00)

  --- Scaling Time Series Data ---
  Data scaled successfully using StandardScaler.

    --- Running NeuralForecast Experiment: PatchTST_raw (PatchTST) ---
    Fitting PatchTST...


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer(val_check_interval=1)` was configured so validation will run after every batch.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type              | Params | Mode 
-----------------------------------------------------------
0 | loss         | MAE               | 0      | train
1 | padder_train | ConstantPad1d     | 0      | train
2 | scaler       | TemporalNorm      | 0      | train
3 | model        | PatchTST_backbone | 534 K  | train
-----------------------------------------------------------
534 K     Trainable params
3         Non-trainable params
534 K     Total params
2.136     Total estimated model params size (MB)
90        Modules in train mode
0         Modules in eval mode
SLURM auto-requeueing enabled. Setting signal handlers.


Epoch 0: 100%|██████████| 1/1 [00:00<00:00, 23.03it/s, v_num=1.06e+7, train_loss_step=0.335]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 596.88it/s][A
Epoch 1: 100%|██████████| 1/1 [00:00<00:00, 23.68it/s, v_num=1.06e+7, train_loss_step=0.659, train_loss_epoch=0.335]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 595.19it/s][A
Epoch 2: 100%|██████████| 1/1 [00:00<00:00, 23.65it/s, v_num=1.06e+7, train_loss_step=0.332, train_loss_epoch=0.659]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoad

Trainer already configured with model summary callbacks: [<class 'pytorch_lightning.callbacks.model_summary.ModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer(val_check_interval=1)` was configured so validation will run after every batch.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type              | Params | Mode 
-----------------------------------------------------------
0 | loss         | MAE               | 0      | train
1 | padder_train | ConstantPad1d     | 0      | train
2 | scaler       | TemporalNorm      | 0      | train
3 | model        | PatchTST_backbone | 534 K  | train
-----------------------------------------------------------
534 K     Trainable params
3         Non-trainable params
534 K     Total params
2.136     Total estimated model params size (MB)
90        Modules in train mode
0         Modules in ev

Epoch 0: 100%|██████████| 1/1 [00:00<00:00, 17.45it/s, v_num=1.06e+7, train_loss_step=0.128]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 126.64it/s][A
Epoch 0: 100%|██████████| 1/1 [00:00<00:00, 13.78it/s, v_num=1.06e+7, train_loss_step=0.128, valid_loss=0.0614, train_loss_epoch=0.128]· loss curve saved → outputs/ch4/loss_curves/loss_PatchTST_PatchTST_raw.png
Epoch 0: 100%|██████████| 1/1 [00:00<00:00,  3.88it/s, v_num=1.06e+7, train_loss_step=0.128, valid_loss=0.0614, train_loss_epoch=0.128]


Trainer already configured with model summary callbacks: [<class 'pytorch_lightning.callbacks.model_summary.ModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer(val_check_interval=1)` was configured so validation will run after every batch.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
SLURM auto-requeueing enabled. Setting signal handlers.


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 130.12it/s]
    PatchTST prediction done. Duration: 0.44 seconds.
      Saved predictions for PatchTST_raw H1 to PatchTST_PatchTST_raw_H1_predictions_batonrouge.parquet
    Horizon 1h Metrics: MAE=179650148.55, RMSE=280670023.10, sMAPE=106.61%, MSLE=127.3632, P99_abs_error=1063540200.00
      Saved predictions for PatchTST_raw H6 to PatchTST_PatchTST_raw_H6_predictions_batonrouge.parquet
    Horizon 6h Metrics: MAE=164665853.41, RMSE=257465901.81, sMAPE=107.07%, MSLE=103.6718, P99_abs_error=974802808.32
      Saved predictions for PatchTST_raw H12 to PatchTST_PatchTST_raw_H12_predictions_batonrouge.parquet
    Horizon 12h Metrics: MAE=542891716.10, RMSE=1394380332.73, sMAPE=100.88%, MSLE=25.9231, P99_abs_error=6841084826.24
      Saved predictions for PatchTST_raw H24 to PatchTST_PatchTST_raw_H24_predictions_batonrouge.parquet
    Horizon 24h Metrics: MAE=321426997.80, RMSE=682100867.96, sMAPE=96.35%, MSLE=3.7018, P99_abs_erro

[rank: 0] Seed set to 42
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer(val_check_interval=1)` was configured so validation will run after every batch.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type              | Params | Mode 
-----------------------------------------------------------
0 | loss         | MAE               | 0      | train
1 | padder_train | ConstantPad1d     | 0      | train
2 | scaler       | TemporalNorm      | 0      | train
3 | model        | PatchTST_backbone | 534 K  | train
-----------------------------------------------------------
534 K     Trainable params
3         Non-trainable params
534 K     Total params
2.136     Total estimated model params size (MB)
90        Modules in train mode
0         Modules in eval mode
SLURM auto-requeueing enabled. Setting signal handlers.



    --- Running NeuralForecast Experiment: PatchTST_mask_out (PatchTST) ---
    Masked out 14 anomalous periods for training.
    Fitting PatchTST...
Epoch 0: 100%|██████████| 1/1 [00:00<00:00, 17.51it/s, v_num=1.06e+7, train_loss_step=0.250]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 586.78it/s][A
Epoch 1: 100%|██████████| 1/1 [00:00<00:00, 18.76it/s, v_num=1.06e+7, train_loss_step=0.609, train_loss_epoch=0.250]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 596.88it/s][A
Epoch 2: 100%|██████████| 1/1 [00:00<00:00, 18.14it/s, v_num=1.06e+7, train_loss_step=0.274, train_loss_epoch=0.609]
Validation: |          | 0/? [00:00<

Trainer already configured with model summary callbacks: [<class 'pytorch_lightning.callbacks.model_summary.ModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer(val_check_interval=1)` was configured so validation will run after every batch.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type              | Params | Mode 
-----------------------------------------------------------
0 | loss         | MAE               | 0      | train
1 | padder_train | ConstantPad1d     | 0      | train
2 | scaler       | TemporalNorm      | 0      | train
3 | model        | PatchTST_backbone | 534 K  | train
-----------------------------------------------------------
534 K     Trainable params
3         Non-trainable params
534 K     Total params
2.136     Total estimated model params size (MB)
90        Modules in train mode
0         Modules in ev

    PatchTST fitted. Duration: 7.29 seconds.
Epoch 0: 100%|██████████| 1/1 [00:00<00:00, 23.30it/s, v_num=1.06e+7, train_loss_step=0.125]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 165.20it/s][A
Epoch 0: 100%|██████████| 1/1 [00:00<00:00, 17.79it/s, v_num=1.06e+7, train_loss_step=0.125, valid_loss=0.0516, train_loss_epoch=0.125]· loss curve saved → outputs/ch4/loss_curves/loss_PatchTST_PatchTST_mask_out.png
Epoch 0: 100%|██████████| 1/1 [00:00<00:00,  4.24it/s, v_num=1.06e+7, train_loss_step=0.125, valid_loss=0.0516, train_loss_epoch=0.125]


Trainer already configured with model summary callbacks: [<class 'pytorch_lightning.callbacks.model_summary.ModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer(val_check_interval=1)` was configured so validation will run after every batch.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
SLURM auto-requeueing enabled. Setting signal handlers.


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 178.27it/s]
    PatchTST prediction done. Duration: 0.40 seconds.
      Saved predictions for PatchTST_mask_out H1 to PatchTST_PatchTST_mask_out_H1_predictions_batonrouge.parquet
    Horizon 1h Metrics: MAE=125939724.97, RMSE=249265225.21, sMAPE=58.55%, MSLE=55.3972, P99_abs_error=1063540200.00
      Saved predictions for PatchTST_mask_out H6 to PatchTST_PatchTST_mask_out_H6_predictions_batonrouge.parquet
    Horizon 6h Metrics: MAE=420188328.28, RMSE=1070601942.46, sMAPE=81.77%, MSLE=10.7222, P99_abs_error=5253532810.56
      Saved predictions for PatchTST_mask_out H12 to PatchTST_PatchTST_mask_out_H12_predictions_batonrouge.parquet
    Horizon 12h Metrics: MAE=145721664.50, RMSE=243821320.14, sMAPE=79.42%, MSLE=46.4320, P99_abs_error=945515566.08
      Saved predictions for PatchTST_mask_out H24 to PatchTST_PatchTST_mask_out_H24_predictions_batonrouge.parquet
    Horizon 24h Metrics: MAE=657133475.29, RMSE=1765948625.33, sMA

Processing Routers for Modeling (PatchTST):  20%|██        | 2/10 [01:00<03:41, 27.75s/it][rank: 0] Seed set to 42
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer(val_check_interval=1)` was configured so validation will run after every batch.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type              | Params | Mode 
-----------------------------------------------------------
0 | loss         | MAE               | 0      | train
1 | padder_train | ConstantPad1d     | 0      | train
2 | scaler       | TemporalNorm      | 0      | train
3 | model        | PatchTST_backbone | 534 K  | train
-----------------------------------------------------------
534 K     Trainable params
3         Non-trainable params
534 K     Total params
2.136     Total estimated model params size (MB)
90        Modules in train mode
0         Modules in eval mode
SLURM auto-requeueing enabled. Setting signal handlers


==== Starting Model Training & Evaluation for Router: boston (3/10) ====

  --- Splitting Data into Train/Validation/Test Sets ---
  Train set size: 453 hours (from 2021-10-07 23:00:00 to 2021-10-26 19:00:00)
  Validation set size: 168 hours (from 2021-10-26 20:00:00 to 2021-11-02 19:00:00)
  Test set size: 168 hours (from 2021-11-02 20:00:00 to 2021-11-09 19:00:00)

  --- Scaling Time Series Data ---
  Data scaled successfully using StandardScaler.

    --- Running NeuralForecast Experiment: PatchTST_raw (PatchTST) ---
    Fitting PatchTST...
Epoch 0: 100%|██████████| 1/1 [00:00<00:00, 17.86it/s, v_num=1.06e+7, train_loss_step=1.030]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 587.60it/s][A
Epoch 1: 100%|██████████| 1/1 [00:00<00:00, 19.01it/s, v_num=1.06e+7, train_loss_step=1.300, train_loss_epoch=1.0

Trainer already configured with model summary callbacks: [<class 'pytorch_lightning.callbacks.model_summary.ModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer(val_check_interval=1)` was configured so validation will run after every batch.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type              | Params | Mode 
-----------------------------------------------------------
0 | loss         | MAE               | 0      | train
1 | padder_train | ConstantPad1d     | 0      | train
2 | scaler       | TemporalNorm      | 0      | train
3 | model        | PatchTST_backbone | 534 K  | train
-----------------------------------------------------------
534 K     Trainable params
3         Non-trainable params
534 K     Total params
2.136     Total estimated model params size (MB)
90        Modules in train mode
0         Modules in ev

Epoch 0: 100%|██████████| 1/1 [00:00<00:00, 23.40it/s, v_num=1.06e+7, train_loss_step=0.156]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 169.63it/s][A
Epoch 0: 100%|██████████| 1/1 [00:00<00:00, 17.99it/s, v_num=1.06e+7, train_loss_step=0.156, valid_loss=0.267, train_loss_epoch=0.156]· loss curve saved → outputs/ch4/loss_curves/loss_PatchTST_PatchTST_mask_out.png
Epoch 0: 100%|██████████| 1/1 [00:00<00:00,  4.45it/s, v_num=1.06e+7, train_loss_step=0.156, valid_loss=0.267, train_loss_epoch=0.156]


Trainer already configured with model summary callbacks: [<class 'pytorch_lightning.callbacks.model_summary.ModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer(val_check_interval=1)` was configured so validation will run after every batch.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
SLURM auto-requeueing enabled. Setting signal handlers.


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 183.47it/s]
    PatchTST prediction done. Duration: 0.39 seconds.
      Saved predictions for PatchTST_mask_out H1 to PatchTST_PatchTST_mask_out_H1_predictions_dallas.parquet
    Horizon 1h Metrics: MAE=137211927.34, RMSE=162074274.82, sMAPE=151.67%, MSLE=171.1719, P99_abs_error=367205283.36
      Saved predictions for PatchTST_mask_out H6 to PatchTST_PatchTST_mask_out_H6_predictions_dallas.parquet
    Horizon 6h Metrics: MAE=148570234.71, RMSE=176450164.72, sMAPE=166.51%, MSLE=245.0558, P99_abs_error=367040000.00
      Saved predictions for PatchTST_mask_out H12 to PatchTST_PatchTST_mask_out_H12_predictions_dallas.parquet
    Horizon 12h Metrics: MAE=162197545.69, RMSE=189853615.88, sMAPE=181.97%, MSLE=306.0902, P99_abs_error=404669600.00
      Saved predictions for PatchTST_mask_out H24 to PatchTST_PatchTST_mask_out_H24_predictions_dallas.parquet
    Horizon 24h Metrics: MAE=123681896.17, RMSE=156026056.23, sMAPE=114.35%, MS

Processing Routers for Modeling (PatchTST):  40%|████      | 4/10 [01:59<02:57, 29.60s/it][rank: 0] Seed set to 42
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer(val_check_interval=1)` was configured so validation will run after every batch.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type              | Params | Mode 
-----------------------------------------------------------
0 | loss         | MAE               | 0      | train
1 | padder_train | ConstantPad1d     | 0      | train
2 | scaler       | TemporalNorm      | 0      | train
3 | model        | PatchTST_backbone | 534 K  | train
-----------------------------------------------------------
534 K     Trainable params
3         Non-trainable params
534 K     Total params
2.136     Total estimated model params size (MB)
90        Modules in train mode
0         Modules in eval mode
SLURM auto-requeueing enabled. Setting signal handlers


==== Starting Model Training & Evaluation for Router: elpaso (5/10) ====

  --- Splitting Data into Train/Validation/Test Sets ---
  Train set size: 1042 hours (from 2021-10-07 14:00:00 to 2021-11-19 23:00:00)
  Validation set size: 168 hours (from 2021-11-20 00:00:00 to 2021-11-26 23:00:00)
  Test set size: 168 hours (from 2021-11-27 00:00:00 to 2021-12-03 23:00:00)

  --- Scaling Time Series Data ---
  Data scaled successfully using StandardScaler.

    --- Running NeuralForecast Experiment: PatchTST_raw (PatchTST) ---
    Fitting PatchTST...
Epoch 0: 100%|██████████| 1/1 [00:00<00:00, 23.11it/s, v_num=1.06e+7, train_loss_step=0.596]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 593.17it/s][A
Epoch 1: 100%|██████████| 1/1 [00:00<00:00, 23.80it/s, v_num=1.06e+7, train_loss_step=0.847, train_loss_epoch=0.

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



Epoch 96: 100%|██████████| 1/1 [00:00<00:00, 23.82it/s, v_num=1.06e+7, train_loss_step=0.183, train_loss_epoch=0.176]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 607.43it/s][A
Epoch 97: 100%|██████████| 1/1 [00:00<00:00, 23.84it/s, v_num=1.06e+7, train_loss_step=0.178, train_loss_epoch=0.183]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 605.06it/s][A
Epoch 98: 100%|██████████| 1/1 [00:00<00:00, 23.84it/s, v_num=1.06e+7, train_loss_step=0.192, train_loss_epoch=0.178]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/1 [00:00<?, ?i

Trainer already configured with model summary callbacks: [<class 'pytorch_lightning.callbacks.model_summary.ModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer(val_check_interval=1)` was configured so validation will run after every batch.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type              | Params | Mode 
-----------------------------------------------------------
0 | loss         | MAE               | 0      | train
1 | padder_train | ConstantPad1d     | 0      | train
2 | scaler       | TemporalNorm      | 0      | train
3 | model        | PatchTST_backbone | 534 K  | train
-----------------------------------------------------------
534 K     Trainable params
3         Non-trainable params
534 K     Total params
2.136     Total estimated model params size (MB)
90        Modules in train mode
0         Modules in ev

    PatchTST fitted. Duration: 8.12 seconds.
Epoch 0: 100%|██████████| 1/1 [00:00<00:00, 23.34it/s, v_num=1.06e+7, train_loss_step=0.259]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 167.88it/s][A
Epoch 0: 100%|██████████| 1/1 [00:00<00:00, 17.94it/s, v_num=1.06e+7, train_loss_step=0.259, valid_loss=0.232, train_loss_epoch=0.259]· loss curve saved → outputs/ch4/loss_curves/loss_PatchTST_PatchTST_mask_out.png
Epoch 0: 100%|██████████| 1/1 [00:00<00:00,  3.98it/s, v_num=1.06e+7, train_loss_step=0.259, valid_loss=0.232, train_loss_epoch=0.259]


Trainer already configured with model summary callbacks: [<class 'pytorch_lightning.callbacks.model_summary.ModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer(val_check_interval=1)` was configured so validation will run after every batch.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
SLURM auto-requeueing enabled. Setting signal handlers.


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 184.69it/s]
    PatchTST prediction done. Duration: 0.41 seconds.
      Saved predictions for PatchTST_mask_out H1 to PatchTST_PatchTST_mask_out_H1_predictions_elpaso.parquet
    Horizon 1h Metrics: MAE=44029312.70, RMSE=58168506.45, sMAPE=57.98%, MSLE=19.9917, P99_abs_error=163002956.48
      Saved predictions for PatchTST_mask_out H6 to PatchTST_PatchTST_mask_out_H6_predictions_elpaso.parquet
    Horizon 6h Metrics: MAE=48520863.12, RMSE=64752421.12, sMAPE=62.54%, MSLE=7.3611, P99_abs_error=167535532.96
      Saved predictions for PatchTST_mask_out H12 to PatchTST_PatchTST_mask_out_H12_predictions_elpaso.parquet
    Horizon 12h Metrics: MAE=67207885.46, RMSE=87653787.10, sMAPE=82.96%, MSLE=20.0663, P99_abs_error=204999318.48
      Saved predictions for PatchTST_mask_out H24 to PatchTST_PatchTST_mask_out_H24_predictions_elpaso.parquet
    Horizon 24h Metrics: MAE=70075667.09, RMSE=93299190.29, sMAPE=86.78%, MSLE=4.0616, P99_a

Processing Routers for Modeling (PatchTST):  50%|█████     | 5/10 [02:17<02:08, 25.66s/it][rank: 0] Seed set to 42
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer(val_check_interval=1)` was configured so validation will run after every batch.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type              | Params | Mode 
-----------------------------------------------------------
0 | loss         | MAE               | 0      | train
1 | padder_train | ConstantPad1d     | 0      | train
2 | scaler       | TemporalNorm      | 0      | train
3 | model        | PatchTST_backbone | 534 K  | train
-----------------------------------------------------------
534 K     Trainable params
3         Non-trainable params
534 K     Total params
2.136     Total estimated model params size (MB)
90        Modules in train mode
0         Modules in eval mode
SLURM auto-requeueing enabled. Setting signal handlers


==== Starting Model Training & Evaluation for Router: jackson (6/10) ====

  --- Splitting Data into Train/Validation/Test Sets ---
  Train set size: 1033 hours (from 2021-10-07 23:00:00 to 2021-11-19 23:00:00)
  Validation set size: 168 hours (from 2021-11-20 00:00:00 to 2021-11-26 23:00:00)
  Test set size: 168 hours (from 2021-11-27 00:00:00 to 2021-12-03 23:00:00)

  --- Scaling Time Series Data ---
  Data scaled successfully using StandardScaler.

    --- Running NeuralForecast Experiment: PatchTST_raw (PatchTST) ---
    Fitting PatchTST...
Epoch 0: 100%|██████████| 1/1 [00:00<00:00, 23.11it/s, v_num=1.06e+7, train_loss_step=1.120]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 595.27it/s][A
Epoch 1: 100%|██████████| 1/1 [00:00<00:00, 23.78it/s, v_num=1.06e+7, train_loss_step=1.620, train_loss_epoch=1

Trainer already configured with model summary callbacks: [<class 'pytorch_lightning.callbacks.model_summary.ModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer(val_check_interval=1)` was configured so validation will run after every batch.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type              | Params | Mode 
-----------------------------------------------------------
0 | loss         | MAE               | 0      | train
1 | padder_train | ConstantPad1d     | 0      | train
2 | scaler       | TemporalNorm      | 0      | train
3 | model        | PatchTST_backbone | 534 K  | train
-----------------------------------------------------------
534 K     Trainable params
3         Non-trainable params
534 K     Total params
2.136     Total estimated model params size (MB)
90        Modules in train mode
0         Modules in ev

    PatchTST fitted. Duration: 17.24 seconds.
Epoch 0: 100%|██████████| 1/1 [00:00<00:00, 23.26it/s, v_num=1.06e+7, train_loss_step=0.173]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 166.41it/s][A
Epoch 0: 100%|██████████| 1/1 [00:00<00:00, 17.84it/s, v_num=1.06e+7, train_loss_step=0.173, valid_loss=0.755, train_loss_epoch=0.173]· loss curve saved → outputs/ch4/loss_curves/loss_PatchTST_PatchTST_raw.png
Epoch 0: 100%|██████████| 1/1 [00:00<00:00,  4.01it/s, v_num=1.06e+7, train_loss_step=0.173, valid_loss=0.755, train_loss_epoch=0.173]


Trainer already configured with model summary callbacks: [<class 'pytorch_lightning.callbacks.model_summary.ModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer(val_check_interval=1)` was configured so validation will run after every batch.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
SLURM auto-requeueing enabled. Setting signal handlers.


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 191.15it/s]
    PatchTST prediction done. Duration: 0.41 seconds.
      Saved predictions for PatchTST_raw H1 to PatchTST_PatchTST_raw_H1_predictions_jackson.parquet
    Horizon 1h Metrics: MAE=70521274.07, RMSE=96845934.25, sMAPE=127.65%, MSLE=45.6559, P99_abs_error=282968188.16
      Saved predictions for PatchTST_raw H6 to PatchTST_PatchTST_raw_H6_predictions_jackson.parquet
    Horizon 6h Metrics: MAE=70856269.38, RMSE=100220729.42, sMAPE=153.43%, MSLE=104.7671, P99_abs_error=303842017.92
      Saved predictions for PatchTST_raw H12 to PatchTST_PatchTST_raw_H12_predictions_jackson.parquet
    Horizon 12h Metrics: MAE=65426702.43, RMSE=91074938.72, sMAPE=147.91%, MSLE=82.8680, P99_abs_error=284299073.92
      Saved predictions for PatchTST_raw H24 to PatchTST_PatchTST_raw_H24_predictions_jackson.parquet
    Horizon 24h Metrics: MAE=50698275.93, RMSE=62803912.71, sMAPE=167.15%, MSLE=124.5668, P99_abs_error=142348168.00


[rank: 0] Seed set to 42
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer(val_check_interval=1)` was configured so validation will run after every batch.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type              | Params | Mode 
-----------------------------------------------------------
0 | loss         | MAE               | 0      | train
1 | padder_train | ConstantPad1d     | 0      | train
2 | scaler       | TemporalNorm      | 0      | train
3 | model        | PatchTST_backbone | 534 K  | train
-----------------------------------------------------------
534 K     Trainable params
3         Non-trainable params
534 K     Total params
2.136     Total estimated model params size (MB)
90        Modules in train mode
0         Modules in eval mode
SLURM auto-requeueing enabled. Setting signal handlers.



    --- Running NeuralForecast Experiment: PatchTST_mask_out (PatchTST) ---
    Masked out 14 anomalous periods for training.
    Fitting PatchTST...
Epoch 0: 100%|██████████| 1/1 [00:00<00:00, 23.06it/s, v_num=1.06e+7, train_loss_step=1.060]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 590.75it/s][A
Epoch 1: 100%|██████████| 1/1 [00:00<00:00, 23.79it/s, v_num=1.06e+7, train_loss_step=1.510, train_loss_epoch=1.060]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 590.17it/s][A
Epoch 2: 100%|██████████| 1/1 [00:00<00:00, 23.83it/s, v_num=1.06e+7, train_loss_step=0.771, train_loss_epoch=1.510]
Validation: |          | 0/? [00:00<

Trainer already configured with model summary callbacks: [<class 'pytorch_lightning.callbacks.model_summary.ModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer(val_check_interval=1)` was configured so validation will run after every batch.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type              | Params | Mode 
-----------------------------------------------------------
0 | loss         | MAE               | 0      | train
1 | padder_train | ConstantPad1d     | 0      | train
2 | scaler       | TemporalNorm      | 0      | train
3 | model        | PatchTST_backbone | 534 K  | train
-----------------------------------------------------------
534 K     Trainable params
3         Non-trainable params
534 K     Total params
2.136     Total estimated model params size (MB)
90        Modules in train mode
0         Modules in ev

Epoch 0: 100%|██████████| 1/1 [00:00<00:00, 23.30it/s, v_num=1.06e+7, train_loss_step=0.591]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 167.00it/s][A
Epoch 0: 100%|██████████| 1/1 [00:00<00:00, 17.88it/s, v_num=1.06e+7, train_loss_step=0.591, valid_loss=0.802, train_loss_epoch=0.591]· loss curve saved → outputs/ch4/loss_curves/loss_PatchTST_PatchTST_mask_out.png
Epoch 0: 100%|██████████| 1/1 [00:00<00:00,  4.32it/s, v_num=1.06e+7, train_loss_step=0.591, valid_loss=0.802, train_loss_epoch=0.591]


Trainer already configured with model summary callbacks: [<class 'pytorch_lightning.callbacks.model_summary.ModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer(val_check_interval=1)` was configured so validation will run after every batch.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
SLURM auto-requeueing enabled. Setting signal handlers.


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 186.25it/s]
    PatchTST prediction done. Duration: 0.39 seconds.
      Saved predictions for PatchTST_mask_out H1 to PatchTST_PatchTST_mask_out_H1_predictions_jackson.parquet
    Horizon 1h Metrics: MAE=72895294.47, RMSE=113171651.51, sMAPE=134.46%, MSLE=75.1323, P99_abs_error=364258587.20
      Saved predictions for PatchTST_mask_out H6 to PatchTST_PatchTST_mask_out_H6_predictions_jackson.parquet
    Horizon 6h Metrics: MAE=57523014.15, RMSE=87711569.68, sMAPE=144.65%, MSLE=104.0557, P99_abs_error=328359260.80
      Saved predictions for PatchTST_mask_out H12 to PatchTST_PatchTST_mask_out_H12_predictions_jackson.parquet
    Horizon 12h Metrics: MAE=51258750.08, RMSE=69070526.53, sMAPE=147.70%, MSLE=103.1238, P99_abs_error=163039103.68
      Saved predictions for PatchTST_mask_out H24 to PatchTST_PatchTST_mask_out_H24_predictions_jackson.parquet
    Horizon 24h Metrics: MAE=48773592.75, RMSE=76323616.41, sMAPE=152.59%, MSLE=1

Processing Routers for Modeling (PatchTST):  60%|██████    | 6/10 [02:50<01:51, 27.90s/it][rank: 0] Seed set to 42
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer(val_check_interval=1)` was configured so validation will run after every batch.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type              | Params | Mode 
-----------------------------------------------------------
0 | loss         | MAE               | 0      | train
1 | padder_train | ConstantPad1d     | 0      | train
2 | scaler       | TemporalNorm      | 0      | train
3 | model        | PatchTST_backbone | 534 K  | train
-----------------------------------------------------------
534 K     Trainable params
3         Non-trainable params
534 K     Total params
2.136     Total estimated model params size (MB)
90        Modules in train mode
0         Modules in eval mode
SLURM auto-requeueing enabled. Setting signal handlers


==== Starting Model Training & Evaluation for Router: jacksonville (7/10) ====

  --- Splitting Data into Train/Validation/Test Sets ---
  Train set size: 1002 hours (from 2021-10-07 23:00:00 to 2021-11-18 16:00:00)
  Validation set size: 168 hours (from 2021-11-18 17:00:00 to 2021-11-25 16:00:00)
  Test set size: 168 hours (from 2021-11-25 17:00:00 to 2021-12-02 16:00:00)

  --- Scaling Time Series Data ---
  Data scaled successfully using StandardScaler.

    --- Running NeuralForecast Experiment: PatchTST_raw (PatchTST) ---
    Fitting PatchTST...
Epoch 0: 100%|██████████| 1/1 [00:00<00:00, 23.05it/s, v_num=1.06e+7, train_loss_step=0.722]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 597.48it/s][A
Epoch 1: 100%|██████████| 1/1 [00:00<00:00, 23.75it/s, v_num=1.06e+7, train_loss_step=1.150, train_loss_ep

`Trainer.fit` stopped: `max_steps=400` reached.


· loss curve saved → outputs/ch4/loss_curves/loss_PatchTST_PatchTST_raw.png
Epoch 399: 100%|██████████| 1/1 [00:00<00:00,  4.30it/s, v_num=1.06e+7, train_loss_step=0.143, train_loss_epoch=0.143]
    PatchTST fitted. Duration: 20.64 seconds.


Trainer already configured with model summary callbacks: [<class 'pytorch_lightning.callbacks.model_summary.ModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer(val_check_interval=1)` was configured so validation will run after every batch.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type              | Params | Mode 
-----------------------------------------------------------
0 | loss         | MAE               | 0      | train
1 | padder_train | ConstantPad1d     | 0      | train
2 | scaler       | TemporalNorm      | 0      | train
3 | model        | PatchTST_backbone | 534 K  | train
-----------------------------------------------------------
534 K     Trainable params
3         Non-trainable params
534 K     Total params
2.136     Total estimated model params size (MB)
90        Modules in train mode
0         Modules in ev

Epoch 0: 100%|██████████| 1/1 [00:00<00:00, 23.18it/s, v_num=1.06e+7, train_loss_step=0.143]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 163.41it/s][A
Epoch 1: 100%|██████████| 1/1 [00:00<00:00, 23.76it/s, v_num=1.06e+7, train_loss_step=0.323, valid_loss=0.465, train_loss_epoch=0.143]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 168.23it/s][A
Epoch 2: 100%|██████████| 1/1 [00:00<00:00, 23.81it/s, v_num=1.06e+7, train_loss_step=0.280, valid_loss=0.449, train_loss_epoch=0.323]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/1 [00

Trainer already configured with model summary callbacks: [<class 'pytorch_lightning.callbacks.model_summary.ModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer(val_check_interval=1)` was configured so validation will run after every batch.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
SLURM auto-requeueing enabled. Setting signal handlers.


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 181.90it/s]
    PatchTST prediction done. Duration: 1.23 seconds.
      Saved predictions for PatchTST_raw H1 to PatchTST_PatchTST_raw_H1_predictions_jacksonville.parquet
    Horizon 1h Metrics: MAE=213363380.52, RMSE=324841698.41, sMAPE=41.22%, MSLE=0.3554, P99_abs_error=1212962802.56
      Saved predictions for PatchTST_raw H6 to PatchTST_PatchTST_raw_H6_predictions_jacksonville.parquet
    Horizon 6h Metrics: MAE=247203937.93, RMSE=344849235.50, sMAPE=48.13%, MSLE=0.4204, P99_abs_error=1132677788.80
      Saved predictions for PatchTST_raw H12 to PatchTST_PatchTST_raw_H12_predictions_jacksonville.parquet
    Horizon 12h Metrics: MAE=281130304.39, RMSE=395661902.69, sMAPE=56.38%, MSLE=0.6890, P99_abs_error=1194230449.28
      Saved predictions for PatchTST_raw H24 to PatchTST_PatchTST_raw_H24_predictions_jacksonville.parquet
    Horizon 24h Metrics: MAE=293843122.15, RMSE=374624539.49, sMAPE=75.66%, MSLE=6.0841, P99_abs_erro

[rank: 0] Seed set to 42
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer(val_check_interval=1)` was configured so validation will run after every batch.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type              | Params | Mode 
-----------------------------------------------------------
0 | loss         | MAE               | 0      | train
1 | padder_train | ConstantPad1d     | 0      | train
2 | scaler       | TemporalNorm      | 0      | train
3 | model        | PatchTST_backbone | 534 K  | train
-----------------------------------------------------------
534 K     Trainable params
3         Non-trainable params
534 K     Total params
2.136     Total estimated model params size (MB)
90        Modules in train mode
0         Modules in eval mode
SLURM auto-requeueing enabled. Setting signal handlers.



    --- Running NeuralForecast Experiment: PatchTST_mask_out (PatchTST) ---
    Masked out 12 anomalous periods for training.
    Fitting PatchTST...
Epoch 0: 100%|██████████| 1/1 [00:00<00:00, 23.07it/s, v_num=1.06e+7, train_loss_step=0.686]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 587.03it/s][A
Epoch 1: 100%|██████████| 1/1 [00:00<00:00, 23.76it/s, v_num=1.06e+7, train_loss_step=1.150, train_loss_epoch=0.686]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 591.91it/s][A
Epoch 2: 100%|██████████| 1/1 [00:00<00:00, 23.81it/s, v_num=1.06e+7, train_loss_step=0.583, train_loss_epoch=1.150]
Validation: |          | 0/? [00:00<

`Trainer.fit` stopped: `max_steps=400` reached.


· loss curve saved → outputs/ch4/loss_curves/loss_PatchTST_PatchTST_mask_out.png
Epoch 399: 100%|██████████| 1/1 [00:00<00:00,  4.17it/s, v_num=1.06e+7, train_loss_step=0.130, train_loss_epoch=0.130]


Trainer already configured with model summary callbacks: [<class 'pytorch_lightning.callbacks.model_summary.ModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer(val_check_interval=1)` was configured so validation will run after every batch.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type              | Params | Mode 
-----------------------------------------------------------
0 | loss         | MAE               | 0      | train
1 | padder_train | ConstantPad1d     | 0      | train
2 | scaler       | TemporalNorm      | 0      | train
3 | model        | PatchTST_backbone | 534 K  | train
-----------------------------------------------------------
534 K     Trainable params
3         Non-trainable params
534 K     Total params
2.136     Total estimated model params size (MB)
90        Modules in train mode
0         Modules in ev

    PatchTST fitted. Duration: 20.49 seconds.
Epoch 0: 100%|██████████| 1/1 [00:00<00:00, 23.33it/s, v_num=1.06e+7, train_loss_step=0.334]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 166.64it/s][A
Epoch 1: 100%|██████████| 1/1 [00:00<00:00, 23.83it/s, v_num=1.06e+7, train_loss_step=0.396, valid_loss=0.460, train_loss_epoch=0.334]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 170.51it/s][A
Epoch 2: 100%|██████████| 1/1 [00:00<00:00, 23.72it/s, v_num=1.06e+7, train_loss_step=0.321, valid_loss=0.444, train_loss_epoch=0.396]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/1 [00:00<?, ?it/s][A
Val

Trainer already configured with model summary callbacks: [<class 'pytorch_lightning.callbacks.model_summary.ModelSummary'>]. Skipping setting a default `ModelSummary` callback.





GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer(val_check_interval=1)` was configured so validation will run after every batch.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
SLURM auto-requeueing enabled. Setting signal handlers.


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 192.13it/s]
    PatchTST prediction done. Duration: 1.18 seconds.
      Saved predictions for PatchTST_mask_out H1 to PatchTST_PatchTST_mask_out_H1_predictions_jacksonville.parquet
    Horizon 1h Metrics: MAE=200008027.03, RMSE=282216530.27, sMAPE=38.56%, MSLE=0.2639, P99_abs_error=839534584.32
      Saved predictions for PatchTST_mask_out H6 to PatchTST_PatchTST_mask_out_H6_predictions_jacksonville.parquet
    Horizon 6h Metrics: MAE=261252051.70, RMSE=353656326.91, sMAPE=52.38%, MSLE=0.4798, P99_abs_error=1101110648.96
      Saved predictions for PatchTST_mask_out H12 to PatchTST_PatchTST_mask_out_H12_predictions_jacksonville.parquet
    Horizon 12h Metrics: MAE=275044973.96, RMSE=373494518.23, sMAPE=59.21%, MSLE=0.8499, P99_abs_error=1116331641.60
      Saved predictions for PatchTST_mask_out H24 to PatchTST_PatchTST_mask_out_H24_predictions_jacksonville.parquet
    Horizon 24h Metrics: MAE=304937673.54, RMSE=374345883.87, 

Processing Routers for Modeling (PatchTST):  70%|███████   | 7/10 [03:34<01:39, 33.26s/it][rank: 0] Seed set to 42
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer(val_check_interval=1)` was configured so validation will run after every batch.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type              | Params | Mode 
-----------------------------------------------------------
0 | loss         | MAE               | 0      | train
1 | padder_train | ConstantPad1d     | 0      | train
2 | scaler       | TemporalNorm      | 0      | train
3 | model        | PatchTST_backbone | 534 K  | train
-----------------------------------------------------------
534 K     Trainable params
3         Non-trainable params
534 K     Total params
2.136     Total estimated model params size (MB)
90        Modules in train mode
0         Modules in eval mode
SLURM auto-requeueing enabled. Setting signal handlers


==== Starting Model Training & Evaluation for Router: louisville (8/10) ====

  --- Splitting Data into Train/Validation/Test Sets ---
  Train set size: 1130 hours (from 2021-09-30 15:00:00 to 2021-11-16 16:00:00)
  Validation set size: 168 hours (from 2021-11-16 17:00:00 to 2021-11-23 16:00:00)
  Test set size: 168 hours (from 2021-11-23 17:00:00 to 2021-11-30 16:00:00)

  --- Scaling Time Series Data ---
  Data scaled successfully using StandardScaler.

    --- Running NeuralForecast Experiment: PatchTST_raw (PatchTST) ---
    Fitting PatchTST...
Epoch 0: 100%|██████████| 1/1 [00:00<00:00, 23.11it/s, v_num=1.06e+7, train_loss_step=0.0269]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 590.66it/s][A
Epoch 1: 100%|██████████| 1/1 [00:00<00:00, 23.79it/s, v_num=1.06e+7, train_loss_step=0.0329, train_loss_ep

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



Epoch 342: 100%|██████████| 1/1 [00:00<00:00, 23.85it/s, v_num=1.06e+7, train_loss_step=0.00331, train_loss_epoch=0.00332]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 586.86it/s][A
Epoch 343: 100%|██████████| 1/1 [00:00<00:00, 23.83it/s, v_num=1.06e+7, train_loss_step=0.00324, train_loss_epoch=0.00331]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 589.25it/s][A
Epoch 344: 100%|██████████| 1/1 [00:00<00:00, 23.80it/s, v_num=1.06e+7, train_loss_step=0.00336, train_loss_epoch=0.00324]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0

`Trainer.fit` stopped: `max_steps=400` reached.


· loss curve saved → outputs/ch4/loss_curves/loss_PatchTST_PatchTST_mask_out.png
Epoch 399: 100%|██████████| 1/1 [00:00<00:00,  3.83it/s, v_num=1.06e+7, train_loss_step=0.00306, train_loss_epoch=0.00306]

Trainer already configured with model summary callbacks: [<class 'pytorch_lightning.callbacks.model_summary.ModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer(val_check_interval=1)` was configured so validation will run after every batch.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type              | Params | Mode 
-----------------------------------------------------------
0 | loss         | MAE               | 0      | train
1 | padder_train | ConstantPad1d     | 0      | train
2 | scaler       | TemporalNorm      | 0      | train
3 | model        | PatchTST_backbone | 534 K  | train
-----------------------------------------------------------
534 K     Trainable params
3         Non-trainable params
534 K     Total params
2.136     Total estimated model params size (MB)
90        Modules in train mode
0         Modules in ev


    PatchTST fitted. Duration: 20.51 seconds.
Epoch 0: 100%|██████████| 1/1 [00:00<00:00, 23.35it/s, v_num=1.06e+7, train_loss_step=0.0197]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 167.30it/s][A
Epoch 1: 100%|██████████| 1/1 [00:00<00:00, 23.82it/s, v_num=1.06e+7, train_loss_step=0.0187, valid_loss=0.0234, train_loss_epoch=0.0197]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 167.64it/s][A
Epoch 2: 100%|██████████| 1/1 [00:00<00:00, 23.82it/s, v_num=1.06e+7, train_loss_step=0.0165, valid_loss=0.0197, train_loss_epoch=0.0187]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/1 [00:00<?, ?it/s

Trainer already configured with model summary callbacks: [<class 'pytorch_lightning.callbacks.model_summary.ModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer(val_check_interval=1)` was configured so validation will run after every batch.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
SLURM auto-requeueing enabled. Setting signal handlers.


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 184.40it/s]
    PatchTST prediction done. Duration: 1.38 seconds.
      Saved predictions for PatchTST_mask_out H1 to PatchTST_PatchTST_mask_out_H1_predictions_louisville.parquet
    Horizon 1h Metrics: MAE=36350929.27, RMSE=47613774.77, sMAPE=40.47%, MSLE=0.3000, P99_abs_error=149122400.96
      Saved predictions for PatchTST_mask_out H6 to PatchTST_PatchTST_mask_out_H6_predictions_louisville.parquet
    Horizon 6h Metrics: MAE=46295613.59, RMSE=81556787.74, sMAPE=48.02%, MSLE=3.5371, P99_abs_error=236145039.04
      Saved predictions for PatchTST_mask_out H12 to PatchTST_PatchTST_mask_out_H12_predictions_louisville.parquet
    Horizon 12h Metrics: MAE=54209639.41, RMSE=98982250.86, sMAPE=49.19%, MSLE=7.8758, P99_abs_error=425804393.28
      Saved predictions for PatchTST_mask_out H24 to PatchTST_PatchTST_mask_out_H24_predictions_louisville.parquet
    Horizon 24h Metrics: MAE=57525729.85, RMSE=97783484.67, sMAPE=52.65%, MSLE

Processing Routers for Modeling (PatchTST):  80%|████████  | 8/10 [04:05<01:05, 32.50s/it][rank: 0] Seed set to 42
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer(val_check_interval=1)` was configured so validation will run after every batch.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type              | Params | Mode 
-----------------------------------------------------------
0 | loss         | MAE               | 0      | train
1 | padder_train | ConstantPad1d     | 0      | train
2 | scaler       | TemporalNorm      | 0      | train
3 | model        | PatchTST_backbone | 534 K  | train
-----------------------------------------------------------
534 K     Trainable params
3         Non-trainable params
534 K     Total params
2.136     Total estimated model params size (MB)
90        Modules in train mode
0         Modules in eval mode
SLURM auto-requeueing enabled. Setting signal handlers


==== Starting Model Training & Evaluation for Router: phoenix (9/10) ====

  --- Splitting Data into Train/Validation/Test Sets ---
  Train set size: 1033 hours (from 2021-10-07 23:00:00 to 2021-11-19 23:00:00)
  Validation set size: 168 hours (from 2021-11-20 00:00:00 to 2021-11-26 23:00:00)
  Test set size: 168 hours (from 2021-11-27 00:00:00 to 2021-12-03 23:00:00)

  --- Scaling Time Series Data ---
  Data scaled successfully using StandardScaler.

    --- Running NeuralForecast Experiment: PatchTST_raw (PatchTST) ---
    Fitting PatchTST...
Epoch 0: 100%|██████████| 1/1 [00:00<00:00, 23.07it/s, v_num=1.06e+7, train_loss_step=0.577]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 584.82it/s][A
Epoch 1: 100%|██████████| 1/1 [00:00<00:00, 23.73it/s, v_num=1.06e+7, train_loss_step=0.923, train_loss_epoch=0

Trainer already configured with model summary callbacks: [<class 'pytorch_lightning.callbacks.model_summary.ModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer(val_check_interval=1)` was configured so validation will run after every batch.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type              | Params | Mode 
-----------------------------------------------------------
0 | loss         | MAE               | 0      | train
1 | padder_train | ConstantPad1d     | 0      | train
2 | scaler       | TemporalNorm      | 0      | train
3 | model        | PatchTST_backbone | 534 K  | train
-----------------------------------------------------------
534 K     Trainable params
3         Non-trainable params
534 K     Total params
2.136     Total estimated model params size (MB)
90        Modules in train mode
0         Modules in ev

    PatchTST fitted. Duration: 9.67 seconds.
Epoch 0: 100%|██████████| 1/1 [00:00<00:00, 23.29it/s, v_num=1.06e+7, train_loss_step=0.133]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 166.52it/s][A
Epoch 0: 100%|██████████| 1/1 [00:00<00:00, 17.79it/s, v_num=1.06e+7, train_loss_step=0.133, valid_loss=0.163, train_loss_epoch=0.133]· loss curve saved → outputs/ch4/loss_curves/loss_PatchTST_PatchTST_raw.png
Epoch 0: 100%|██████████| 1/1 [00:00<00:00,  4.01it/s, v_num=1.06e+7, train_loss_step=0.133, valid_loss=0.163, train_loss_epoch=0.133]


Trainer already configured with model summary callbacks: [<class 'pytorch_lightning.callbacks.model_summary.ModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer(val_check_interval=1)` was configured so validation will run after every batch.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
SLURM auto-requeueing enabled. Setting signal handlers.


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 195.96it/s]
    PatchTST prediction done. Duration: 0.42 seconds.
      Saved predictions for PatchTST_raw H1 to PatchTST_PatchTST_raw_H1_predictions_phoenix.parquet
    Horizon 1h Metrics: MAE=35666824.19, RMSE=44090359.60, sMAPE=63.21%, MSLE=12.7762, P99_abs_error=114249871.36
      Saved predictions for PatchTST_raw H6 to PatchTST_PatchTST_raw_H6_predictions_phoenix.parquet
    Horizon 6h Metrics: MAE=43567367.28, RMSE=57223382.69, sMAPE=66.59%, MSLE=1.9723, P99_abs_error=152784572.48
      Saved predictions for PatchTST_raw H12 to PatchTST_PatchTST_raw_H12_predictions_phoenix.parquet
    Horizon 12h Metrics: MAE=57013869.94, RMSE=67023750.10, sMAPE=85.69%, MSLE=5.3779, P99_abs_error=144233185.60
      Saved predictions for PatchTST_raw H24 to PatchTST_PatchTST_raw_H24_predictions_phoenix.parquet
    Horizon 24h Metrics: MAE=45186612.28, RMSE=60110614.58, sMAPE=81.10%, MSLE=3.6629, P99_abs_error=172140014.40


[rank: 0] Seed set to 42
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer(val_check_interval=1)` was configured so validation will run after every batch.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type              | Params | Mode 
-----------------------------------------------------------
0 | loss         | MAE               | 0      | train
1 | padder_train | ConstantPad1d     | 0      | train
2 | scaler       | TemporalNorm      | 0      | train
3 | model        | PatchTST_backbone | 534 K  | train
-----------------------------------------------------------
534 K     Trainable params
3         Non-trainable params
534 K     Total params
2.136     Total estimated model params size (MB)
90        Modules in train mode
0         Modules in eval mode
SLURM auto-requeueing enabled. Setting signal handlers.



    --- Running NeuralForecast Experiment: PatchTST_mask_out (PatchTST) ---
    Masked out 14 anomalous periods for training.
    Fitting PatchTST...
Epoch 0: 100%|██████████| 1/1 [00:00<00:00, 22.99it/s, v_num=1.06e+7, train_loss_step=0.512]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 589.75it/s][A
Epoch 1: 100%|██████████| 1/1 [00:00<00:00, 23.77it/s, v_num=1.06e+7, train_loss_step=0.798, train_loss_epoch=0.512]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 597.91it/s][A
Epoch 2: 100%|██████████| 1/1 [00:00<00:00, 23.81it/s, v_num=1.06e+7, train_loss_step=0.416, train_loss_epoch=0.798]
Validation: |          | 0/? [00:00<

Trainer already configured with model summary callbacks: [<class 'pytorch_lightning.callbacks.model_summary.ModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer(val_check_interval=1)` was configured so validation will run after every batch.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type              | Params | Mode 
-----------------------------------------------------------
0 | loss         | MAE               | 0      | train
1 | padder_train | ConstantPad1d     | 0      | train
2 | scaler       | TemporalNorm      | 0      | train
3 | model        | PatchTST_backbone | 534 K  | train
-----------------------------------------------------------
534 K     Trainable params
3         Non-trainable params
534 K     Total params
2.136     Total estimated model params size (MB)
90        Modules in train mode
0         Modules in ev


    PatchTST fitted. Duration: 10.44 seconds.
Epoch 0: 100%|██████████| 1/1 [00:00<00:00, 23.28it/s, v_num=1.06e+7, train_loss_step=0.216]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 165.67it/s][A
Epoch 0: 100%|██████████| 1/1 [00:00<00:00, 17.77it/s, v_num=1.06e+7, train_loss_step=0.216, valid_loss=0.177, train_loss_epoch=0.216]· loss curve saved → outputs/ch4/loss_curves/loss_PatchTST_PatchTST_mask_out.png
Epoch 0: 100%|██████████| 1/1 [00:00<00:00,  3.92it/s, v_num=1.06e+7, train_loss_step=0.216, valid_loss=0.177, train_loss_epoch=0.216]


Trainer already configured with model summary callbacks: [<class 'pytorch_lightning.callbacks.model_summary.ModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer(val_check_interval=1)` was configured so validation will run after every batch.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
SLURM auto-requeueing enabled. Setting signal handlers.


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 184.51it/s]
    PatchTST prediction done. Duration: 0.42 seconds.
      Saved predictions for PatchTST_mask_out H1 to PatchTST_PatchTST_mask_out_H1_predictions_phoenix.parquet
    Horizon 1h Metrics: MAE=61173454.39, RMSE=86058152.52, sMAPE=71.28%, MSLE=1.1945, P99_abs_error=286923995.20
      Saved predictions for PatchTST_mask_out H6 to PatchTST_PatchTST_mask_out_H6_predictions_phoenix.parquet
    Horizon 6h Metrics: MAE=69472654.06, RMSE=93911811.27, sMAPE=96.33%, MSLE=27.3230, P99_abs_error=272534198.40
      Saved predictions for PatchTST_mask_out H12 to PatchTST_PatchTST_mask_out_H12_predictions_phoenix.parquet
    Horizon 12h Metrics: MAE=82319376.86, RMSE=105615701.36, sMAPE=100.37%, MSLE=6.1452, P99_abs_error=277671839.68
      Saved predictions for PatchTST_mask_out H24 to PatchTST_PatchTST_mask_out_H24_predictions_phoenix.parquet
    Horizon 24h Metrics: MAE=60902067.42, RMSE=85077015.21, sMAPE=89.76%, MSLE=5.0799, 

Processing Routers for Modeling (PatchTST):  90%|█████████ | 9/10 [04:26<00:29, 29.13s/it][rank: 0] Seed set to 42
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer(val_check_interval=1)` was configured so validation will run after every batch.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type              | Params | Mode 
-----------------------------------------------------------
0 | loss         | MAE               | 0      | train
1 | padder_train | ConstantPad1d     | 0      | train
2 | scaler       | TemporalNorm      | 0      | train
3 | model        | PatchTST_backbone | 534 K  | train
-----------------------------------------------------------
534 K     Trainable params
3         Non-trainable params
534 K     Total params
2.136     Total estimated model params size (MB)
90        Modules in train mode
0         Modules in eval mode
SLURM auto-requeueing enabled. Setting signal handlers


==== Starting Model Training & Evaluation for Router: reno (10/10) ====

  --- Splitting Data into Train/Validation/Test Sets ---
  Train set size: 525 hours (from 2021-10-07 23:00:00 to 2021-10-29 19:00:00)
  Validation set size: 168 hours (from 2021-10-29 20:00:00 to 2021-11-05 19:00:00)
  Test set size: 168 hours (from 2021-11-05 20:00:00 to 2021-11-12 19:00:00)

  --- Scaling Time Series Data ---
  Data scaled successfully using StandardScaler.

    --- Running NeuralForecast Experiment: PatchTST_raw (PatchTST) ---
    Fitting PatchTST...
Epoch 0: 100%|██████████| 1/1 [00:00<00:00, 22.92it/s, v_num=1.06e+7, train_loss_step=0.336]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 582.62it/s][A
Epoch 1: 100%|██████████| 1/1 [00:00<00:00, 23.70it/s, v_num=1.06e+7, train_loss_step=0.620, train_loss_epoch=0.33

Trainer already configured with model summary callbacks: [<class 'pytorch_lightning.callbacks.model_summary.ModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer(val_check_interval=1)` was configured so validation will run after every batch.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type              | Params | Mode 
-----------------------------------------------------------
0 | loss         | MAE               | 0      | train
1 | padder_train | ConstantPad1d     | 0      | train
2 | scaler       | TemporalNorm      | 0      | train
3 | model        | PatchTST_backbone | 534 K  | train
-----------------------------------------------------------
534 K     Trainable params
3         Non-trainable params
534 K     Total params
2.136     Total estimated model params size (MB)
90        Modules in train mode
0         Modules in ev

Epoch 0: 100%|██████████| 1/1 [00:00<00:00, 23.28it/s, v_num=1.06e+7, train_loss_step=0.0288]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 165.88it/s][A
Epoch 0: 100%|██████████| 1/1 [00:00<00:00, 17.83it/s, v_num=1.06e+7, train_loss_step=0.0288, valid_loss=0.170, train_loss_epoch=0.0288]· loss curve saved → outputs/ch4/loss_curves/loss_PatchTST_PatchTST_raw.png
Epoch 0: 100%|██████████| 1/1 [00:00<00:00,  4.32it/s, v_num=1.06e+7, train_loss_step=0.0288, valid_loss=0.170, train_loss_epoch=0.0288]


Trainer already configured with model summary callbacks: [<class 'pytorch_lightning.callbacks.model_summary.ModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer(val_check_interval=1)` was configured so validation will run after every batch.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
SLURM auto-requeueing enabled. Setting signal handlers.


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 182.00it/s]
    PatchTST prediction done. Duration: 0.39 seconds.
      Saved predictions for PatchTST_raw H1 to PatchTST_PatchTST_raw_H1_predictions_reno.parquet
    Horizon 1h Metrics: MAE=163215582.01, RMSE=212147067.01, sMAPE=149.71%, MSLE=120.2195, P99_abs_error=525559769.92
      Saved predictions for PatchTST_raw H6 to PatchTST_PatchTST_raw_H6_predictions_reno.parquet
    Horizon 6h Metrics: MAE=196220749.89, RMSE=319056261.20, sMAPE=163.25%, MSLE=159.9804, P99_abs_error=1062169867.20
      Saved predictions for PatchTST_raw H12 to PatchTST_PatchTST_raw_H12_predictions_reno.parquet
    Horizon 12h Metrics: MAE=193663175.15, RMSE=308272516.38, sMAPE=163.47%, MSLE=157.8060, P99_abs_error=1021527264.96
      Saved predictions for PatchTST_raw H24 to PatchTST_PatchTST_raw_H24_predictions_reno.parquet
    Horizon 24h Metrics: MAE=149993479.87, RMSE=263643097.91, sMAPE=149.80%, MSLE=153.2857, P99_abs_error=874250880.96


[rank: 0] Seed set to 42
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer(val_check_interval=1)` was configured so validation will run after every batch.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type              | Params | Mode 
-----------------------------------------------------------
0 | loss         | MAE               | 0      | train
1 | padder_train | ConstantPad1d     | 0      | train
2 | scaler       | TemporalNorm      | 0      | train
3 | model        | PatchTST_backbone | 534 K  | train
-----------------------------------------------------------
534 K     Trainable params
3         Non-trainable params
534 K     Total params
2.136     Total estimated model params size (MB)
90        Modules in train mode
0         Modules in eval mode
SLURM auto-requeueing enabled. Setting signal handlers.



    --- Running NeuralForecast Experiment: PatchTST_mask_out (PatchTST) ---
    Masked out 9 anomalous periods for training.
    Fitting PatchTST...
Epoch 0: 100%|██████████| 1/1 [00:00<00:00, 23.01it/s, v_num=1.06e+7, train_loss_step=0.233]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 589.34it/s][A
Epoch 1: 100%|██████████| 1/1 [00:00<00:00, 23.76it/s, v_num=1.06e+7, train_loss_step=0.414, train_loss_epoch=0.233]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 588.92it/s][A
Epoch 2: 100%|██████████| 1/1 [00:00<00:00, 23.78it/s, v_num=1.06e+7, train_loss_step=0.206, train_loss_epoch=0.414]
Validation: |          | 0/? [00:00<?

Trainer already configured with model summary callbacks: [<class 'pytorch_lightning.callbacks.model_summary.ModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer(val_check_interval=1)` was configured so validation will run after every batch.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type              | Params | Mode 
-----------------------------------------------------------
0 | loss         | MAE               | 0      | train
1 | padder_train | ConstantPad1d     | 0      | train
2 | scaler       | TemporalNorm      | 0      | train
3 | model        | PatchTST_backbone | 534 K  | train
-----------------------------------------------------------
534 K     Trainable params
3         Non-trainable params
534 K     Total params
2.136     Total estimated model params size (MB)
90        Modules in train mode
0         Modules in ev

    PatchTST fitted. Duration: 5.71 seconds.
Epoch 0: 100%|██████████| 1/1 [00:00<00:00, 23.34it/s, v_num=1.06e+7, train_loss_step=0.0424]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 166.03it/s][A
Epoch 0: 100%|██████████| 1/1 [00:00<00:00, 17.87it/s, v_num=1.06e+7, train_loss_step=0.0424, valid_loss=0.230, train_loss_epoch=0.0424]· loss curve saved → outputs/ch4/loss_curves/loss_PatchTST_PatchTST_mask_out.png
Epoch 0: 100%|██████████| 1/1 [00:00<00:00,  4.08it/s, v_num=1.06e+7, train_loss_step=0.0424, valid_loss=0.230, train_loss_epoch=0.0424]


Trainer already configured with model summary callbacks: [<class 'pytorch_lightning.callbacks.model_summary.ModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer(val_check_interval=1)` was configured so validation will run after every batch.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
SLURM auto-requeueing enabled. Setting signal handlers.


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 183.58it/s]
    PatchTST prediction done. Duration: 0.41 seconds.
      Saved predictions for PatchTST_mask_out H1 to PatchTST_PatchTST_mask_out_H1_predictions_reno.parquet
    Horizon 1h Metrics: MAE=256185397.78, RMSE=449011480.96, sMAPE=172.86%, MSLE=175.1591, P99_abs_error=1457552359.68
      Saved predictions for PatchTST_mask_out H6 to PatchTST_PatchTST_mask_out_H6_predictions_reno.parquet
    Horizon 6h Metrics: MAE=168596820.09, RMSE=219448829.73, sMAPE=154.50%, MSLE=127.8566, P99_abs_error=511942534.08
      Saved predictions for PatchTST_mask_out H12 to PatchTST_PatchTST_mask_out_H12_predictions_reno.parquet
    Horizon 12h Metrics: MAE=165058278.58, RMSE=215773287.93, sMAPE=151.66%, MSLE=121.8140, P99_abs_error=515290513.60
      Saved predictions for PatchTST_mask_out H24 to PatchTST_PatchTST_mask_out_H24_predictions_reno.parquet
    Horizon 24h Metrics: MAE=193001399.28, RMSE=226175599.96, sMAPE=165.79%, MSLE=140.

Processing Routers for Modeling (PatchTST): 100%|██████████| 10/10 [04:40<00:00, 28.00s/it]


Removed 0 duplicate result rows.

All model results (including PatchTST) saved to: /home/ramamurthy/mhnarfth/network_analysis/individually process kora/outputs/ch4/all_model_results.csv

--- Notebook 3 Complete: GPU-Friendly Model Training & Evaluation (PatchTST) ---





In [5]:
final_results_df.to_csv(METRICS_FILE_PATH, index=False)

NameError: name 'final_results_df' is not defined