running on colab

# Baseline TFT for Ontario Demand (Kaggle)
# - Target: "Ontario Demand"
# - Drops "Market Demand" entirely to avoid leakage
# - Minimal known future covariates: hour, day_of_week, month, time_idx
# - Encoder: 168h (7 days), Prediction length: 24h

# 0) Installs (safe on Kaggle)
!pip -q install "pytorch-forecasting==1.4.0" "lightning>=2.2,<2.5" "torchmetrics>=1.3,<1.5" --no-cache-dir


# ==============================
# 1) Imports and seed
# ==============================
import os, glob, math
import numpy as np
import pandas as pd
import torch

import lightning.pytorch as pl
from lightning.pytorch.callbacks import EarlyStopping

from pytorch_forecasting import TemporalFusionTransformer, TimeSeriesDataSet
from pytorch_forecasting.data import GroupNormalizer
from pytorch_forecasting.metrics import QuantileLoss

pl.seed_everything(42, workers=True)

# ==============================
# 2) Locate and inspect data
# ==============================
base_dir = "/kaggle/input/iess-demand-2002-2025"
csv_candidates = []
for ext in ("*.csv", "*.CSV"):
    csv_candidates.extend(glob.glob(os.path.join(base_dir, "**", ext), recursive=True))
assert len(csv_candidates) > 0, "No CSV files found under /kaggle/input/iess-demand-2002-2025"
csv_path = max(csv_candidates, key=os.path.getsize)
print(f"Using file: {csv_path}")

# Optional: show available inputs
for dirname, _, filenames in os.walk("/kaggle/input"):
    for filename in filenames[:5]:
        print(os.path.join(dirname, filename))
    break

# ==============================
# 3) Load & preprocess
# ==============================
df = pd.read_csv(csv_path)
df.columns = [c.strip() for c in df.columns]

# Ensure target exists
assert "Ontario Demand" in df.columns, "Expected 'Ontario Demand' column"

# Drop Market Demand to avoid leakage in baseline
df = df.drop(columns=["Market Demand"], errors="ignore")

# Parse datetime from Date + Hour (Hour is 1..24)
df["Hour"] = pd.to_numeric(df["Hour"], errors="coerce").astype("Int64")
df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
df = df.dropna(subset=["Date", "Hour"]).copy()
df["Hour"] = df["Hour"].astype(int).clip(1, 24)

# Build hourly timestamp mapping 1..24 -> 0..23 per day
df["time"] = df["Date"] + pd.to_timedelta(df["Hour"] - 1, unit="h")
df["Ontario Demand"] = pd.to_numeric(df["Ontario Demand"], errors="coerce")
df = df.dropna(subset=["Ontario Demand", "time"]).sort_values("time").reset_index(drop=True)

# Make series id and enforce continuous hourly grid
df = df.drop_duplicates(subset=["time"], keep="first").sort_values("time")
full_range = pd.date_range(df["time"].min(), df["time"].max(), freq="h")  # use 'h'
df = df.set_index("time").reindex(full_range).rename_axis("time").reset_index()

# Forward-fill target gaps created by reindexing (baseline)
df["Ontario Demand"] = df["Ontario Demand"].astype(float).ffill()

# Single series id
df["series"] = "ON"

# Continuous integer time index in hours
df["time_idx"] = ((df["time"] - df["time"].min()).dt.total_seconds() // 3600).astype(int)

# Minimal calendar covariates
df["hour"] = df["time"].dt.hour.astype("int16")
df["day_of_week"] = df["time"].dt.dayofweek.astype("int8")
df["month"] = df["time"].dt.month.astype("int8")

print(df.head())

# ==============================
# 4) Split and dataset
# ==============================
max_encoder_length = 168   # 7 days
max_prediction_length = 24 # next 24 hours

# Chronological split: last 7 days for validation
cutoff = df["time_idx"].max() - max_prediction_length * 7
training_df = df[df["time_idx"] <= cutoff].copy()
validation_df = df[df["time_idx"] > cutoff].copy()
assert len(training_df) > 0 and len(validation_df) > 0

training = TimeSeriesDataSet(
    training_df,
    time_idx="time_idx",
    target="Ontario Demand",
    group_ids=["series"],
    max_encoder_length=max_encoder_length,
    max_prediction_length=max_prediction_length,

    # Minimal baseline features
    time_varying_known_reals=["time_idx", "hour", "day_of_week", "month"],
    time_varying_unknown_reals=["Ontario Demand"],

    # Normalization and helpers
    target_normalizer=GroupNormalizer(groups=["series"]),
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,

    # Tolerate any residual gaps (safety)
    allow_missing_timesteps=True,
)

validation = TimeSeriesDataSet.from_dataset(
    training, df, predict=True, stop_randomization=True
)

# ==============================
# 5) Dataloaders
# ==============================
batch_size = 128
train_loader = training.to_dataloader(
    train=True, batch_size=batch_size, num_workers=0, persistent_workers=False
)
val_loader = validation.to_dataloader(
    train=False, batch_size=batch_size * 2, num_workers=0, persistent_workers=False
)

# ==============================
# 6) Model and trainer
# ==============================
tft = TemporalFusionTransformer.from_dataset(
    training,
    learning_rate=1e-3,
    hidden_size=32,
    attention_head_size=4,
    dropout=0.1,
    hidden_continuous_size=16,
    loss=QuantileLoss(),      # robust multi-horizon baseline
    optimizer="adam",
    reduce_on_plateau_patience=3,
)

early_stop = EarlyStopping(monitor="val_loss", patience=3, mode="min")

trainer = pl.Trainer(
    max_epochs=5,             # quick baseline
    accelerator="auto",
    devices="auto",
    gradient_clip_val=0.1,
    callbacks=[early_stop],
    log_every_n_steps=50,
)

# ==============================
# 7) Train
# ==============================
trainer.fit(tft, train_loader, val_loader)

# ==============================
# 8) Validation predictions & metrics (fixed)
# ==============================
# Predicted 24-step horizons for each validation window (median for QuantileLoss)
preds = tft.predict(val_loader)  # shape [N_windows, 24] for single target

# Collect actual targets from val_loader by unpacking y=(target, weight)
actuals_list = []
for _, y in val_loader:
    # y is (target, weight). target may be a list if multiple targets.
    if isinstance(y, (list, tuple)):
        target = y[0]
    else:
        target = y
    if isinstance(target, list):  # multi-target case
        target = target[0]
    actuals_list.append(target)

actuals = torch.cat([t.detach().cpu().float() for t in actuals_list], dim=0)

# Convert preds to CPU float
preds = preds.detach().cpu().float()

# Overall metrics
mae = torch.mean(torch.abs(actuals - preds)).item()
rmse = torch.sqrt(torch.mean((actuals - preds) ** 2)).item()
smape = torch.mean(200.0 * torch.abs(actuals - preds) / (torch.abs(actuals) + torch.abs(preds) + 1e-6)).item()
print(f"Validation MAE:  {mae:.2f}")
print(f"Validation RMSE: {rmse:.2f}")
print(f"Validation sMAPE: {smape:.2f}%")

# Per-horizon metrics (1..24)
per_h_mae = torch.mean(torch.abs(actuals - preds), dim=0).numpy()
per_h_rmse = torch.sqrt(torch.mean((actuals - preds) ** 2, dim=0)).numpy()
per_h_smape = torch.mean(200.0 * torch.abs(actuals - preds) / (torch.abs(actuals) + torch.abs(preds) + 1e-6), dim=0).numpy()

per_h_df = pd.DataFrame({
    "horizon_hour_ahead": np.arange(1, preds.shape[1] + 1),
    "MAE": per_h_mae,
    "RMSE": per_h_rmse,
    "sMAPE_%": per_h_smape,
})
print(per_h_df.head(10))
per_h_df.to_csv("/kaggle/working/validation_per_horizon_metrics.csv", index=False)

# Optional: inspect first validation window
sample_idx = 0
inspect_df = pd.DataFrame({
    "horizon_hour_ahead": np.arange(1, preds.shape[1] + 1),
    "pred_p50": preds[sample_idx].numpy(),
    "actual": actuals[sample_idx].numpy(),
})
print(inspect_df.head(12))
inspect_df.to_csv("/kaggle/working/sample_validation_window.csv", index=False)

print("Artifacts saved to /kaggle/working/: validation_per_horizon_metrics.csv, sample_validation_window.csv")
