In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [2]:
import sys
from pathlib import Path
sys.path.append(Path(os.getcwd()).parent.parent.as_posix())

In [3]:
import dgl
import json

import optuna
import pickle

import torch
import torch.nn as nn

from functools import partial
from itertools import product
from torch.utils.data import Dataset, DataLoader

from tqdm import tqdm
from dataset import get_datasets, ETTDataset

from utils import seed_everything
from models.gcn import GCNModel

from constructor import construct_ess, construct_vanilla, construct_complete
from graph_features import spectral_features, deepwalk_features

from train import train_step, evaluation_step

import warnings
warnings.simplefilter("ignore")

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
seed_everything()

In [21]:
with open("./spectral_gcn_study_results.pkl", "rb") as f:
    gcn_spectral_h1 = pickle.load(f)

In [22]:
gcn_spectral_h1

{'24_complete': <optuna.study.study.Study at 0x7ed2daf78b90>,
 '24_ess': <optuna.study.study.Study at 0x7ed315b98410>,
 '24_vanilla': <optuna.study.study.Study at 0x7ed315bf2bd0>,
 '48_complete': <optuna.study.study.Study at 0x7ed315a4d450>,
 '48_ess': <optuna.study.study.Study at 0x7ed315aa3bd0>,
 '48_vanilla': <optuna.study.study.Study at 0x7ed315b022d0>,
 '168_complete': <optuna.study.study.Study at 0x7ed315964b50>,
 '168_ess': <optuna.study.study.Study at 0x7ed3159bb150>,
 '168_vanilla': <optuna.study.study.Study at 0x7ed31581dad0>,
 '336_complete': <optuna.study.study.Study at 0x7ed315874650>,
 '336_ess': <optuna.study.study.Study at 0x7ed3158d7090>,
 '336_vanilla': <optuna.study.study.Study at 0x7ed315735b50>,
 '720_complete': <optuna.study.study.Study at 0x7ed315794610>,
 '720_ess': <optuna.study.study.Study at 0x7ed3157f3090>,
 '720_vanilla': <optuna.study.study.Study at 0x7ed315659c90>}

In [7]:
params = gcn_spectral_h1["24_ess"].best_params
params

{'hidden_dim': 224,
 'num_layers': 3,
 'batch_size': 34,
 'lr': 0.0031203038451192997,
 'dropout': 0.20612435547008157}

In [8]:
DATASET_NAME = "ETTh1.csv"

# LSTF setup
LOOKBACK_SIZE = 96
HORIZON_SIZE = 24

# Graphs setup
ALPHA = 0.05
GRAPH_CONSTRUCTION_FN = partial(construct_ess, alpha=ALPHA)
GRAPH_FEATURES_FN = partial(spectral_features, embed_size=7)

# Model setup
BATCH_SIZE = params["batch_size"]
HIDDEN_DIM = params["hidden_dim"]
NUM_LAYERS = params["num_layers"]
DROPOUT = params["dropout"]
ACTIVATION_FN = nn.ReLU

# Train setup
NUM_EPOCHS = 20
LEARNING_RATE = params["num_layers"]
WEIGHT_DECAY = 1e-5
PATIENCE = 2
LR_FACTOR = 0.5

In [9]:
train_ds, val_ds, test_ds = get_datasets(
    dataset_name="ETTh1.csv",
    lookback_size=LOOKBACK_SIZE,
    horizon_size=HORIZON_SIZE
)

In [10]:
class DatasetAdapter(Dataset):
    def __init__(self, dataset: ETTDataset, graph_construction_fn, graph_features_fn=None):
        super().__init__()
        self.graphs: list[dgl.DGLGraph] = []
        self.targets: list[torch.Tensor] = []
        for idx in tqdm(range(len(dataset)), desc="Building graphs"):
            x_data, time_data, y_data = dataset[idx]
            graph = graph_construction_fn(x_data)

            if graph_features_fn:
                graph_features = graph_features_fn(graph)
                graph.ndata["h"] = torch.cat([x_data.T, graph_features], dim=1)
            else:
                graph.ndata["h"] = x_data.T
            
            graph.ndata["h"] = torch.cat([
                graph.ndata["h"],
                time_data.repeat(graph.number_of_nodes(), 1),
            ], dim=1)
            
            self.targets.append(y_data)
            self.graphs.append(graph)

    def __len__(self):
        return len(self.graphs)

    def __getitem__(self, idx) -> tuple[dgl.DGLGraph, torch.Tensor]:
        return self.graphs[idx], self.targets[idx]

In [11]:
def graph_collate_fn(batch):
    """
    Custom collate function for batching DGL graphs.
    :param graphs: batch of graphs and targets
    :returns: batched graph, batch of targets
    """
    graphs, targets = zip(*batch)
    targets_tensor = torch.stack(targets, dim=0)
    return dgl.batch(graphs), targets_tensor

In [12]:
train_adapter_ds = DatasetAdapter(
    dataset=train_ds,
    graph_construction_fn=GRAPH_CONSTRUCTION_FN,
    graph_features_fn=GRAPH_FEATURES_FN
)

val_adapter_ds = DatasetAdapter(
    dataset=val_ds,
    graph_construction_fn=GRAPH_CONSTRUCTION_FN,
    graph_features_fn=GRAPH_FEATURES_FN
)

test_adapter_ds = DatasetAdapter(
    dataset=test_ds,
    graph_construction_fn=GRAPH_CONSTRUCTION_FN,
    graph_features_fn=GRAPH_FEATURES_FN
)

Building graphs: 100%|██████████| 8522/8522 [01:27<00:00, 96.97it/s] 
Building graphs: 100%|██████████| 2762/2762 [00:22<00:00, 123.73it/s]
Building graphs: 100%|██████████| 2762/2762 [00:23<00:00, 116.14it/s]


In [13]:
train_loader = DataLoader(
    dataset=train_adapter_ds,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=4,
    collate_fn=graph_collate_fn
)

val_loader = DataLoader(
    dataset=val_adapter_ds,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=4,
    collate_fn=graph_collate_fn
)

test_loader = DataLoader(
    dataset=test_adapter_ds,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=4,
    collate_fn=graph_collate_fn
)

In [14]:
INPUT_DIM = train_adapter_ds[0][0].ndata["h"].shape[1]
INPUT_DIM

178

In [15]:
class GraphTSModel(nn.Module):
    def __init__(
        self,
        input_dim: int,
        hidden_dim: int,
        num_layers: int,
        horizon_size: int,
        activation_fn: nn.Module,
        dropout: float = 0,
    ) -> "GraphTSModel":
        super().__init__()
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.activation_fn = activation_fn
        self.dropout = dropout
        self.horizon_size = horizon_size

        self.backbone = GCNModel(
            input_dim=self.input_dim,
            hidden_dim=self.hidden_dim,
            num_layers=self.num_layers,
            activation_fn=self.activation_fn,
            dropout=self.dropout
        )

        self.head = nn.Linear(self.hidden_dim, self.horizon_size)
    
    def forward(self, graph, features):
        x = features
        outputs = self.backbone(graph, x)
        tgt_emb = outputs[6::7] # extract OT's embeddings
        outputs = self.head(tgt_emb)
        return outputs

In [19]:
model = GraphTSModel(
    input_dim=INPUT_DIM,
    hidden_dim=128,
    num_layers=1,
    horizon_size=HORIZON_SIZE,
    activation_fn=ACTIVATION_FN,
    dropout=0.1
)

model = model.to(device)

loss_fn = nn.MSELoss()
optimizer = torch.optim.AdamW(
    model.parameters(),
    lr=4e-4,
    weight_decay=1e-5
)

scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer,
    mode="min",
    factor=0.33,
    patience=PATIENCE
)

# scheduler = None

In [20]:
pbar = tqdm(range(30), desc="Training")

for epoch in pbar:
    train_loss = train_step(
        model=model,
        train_loader=train_loader,
        optimizer=optimizer,
        loss_fn=loss_fn,
        device=device
    )
    val_loss = evaluation_step(
        model=model,
        loader=val_loader,
        device=device
    )
    test_loss = evaluation_step(
        model=model,
        loader=test_loader,
        device=device
    )
    
    pbar.set_postfix_str(
        # f"[train] mse = {train_output['mse']:.4f} "
        # f"[train] mae = {train_output['mae']:.4f} "
        f"[valid] mse = {val_loss['mse'] / 8:.4f} "
        f"[valid] mae = {val_loss['mae'] / 4:.4f} "
        f"[test]  mse = {test_loss['mse'] / 8:.4f} "
        f"[test]  mae = {test_loss['mae'] / 4:.4f}"
    )
    if scheduler:
        scheduler.step(val_loss["mse"])

Training: 100%|██████████| 30/30 [01:13<00:00,  2.46s/it, [valid] mse = 0.9327 [valid] mae = 0.5243 [test]  mse = 0.6383 [test]  mae = 0.4345]
