# Use EUPPBench Data in GNN
1) GNN with Conv
2) GNN 1) + GATv2Conv
3) GNN 2) + DeepSetAggregation

In [2]:
from torchmetrics import MeanSquaredError
%cd /home/ltchen/gnnpp
import sys
import os
sys.path.insert(0, os.path.abspath(os.path.join(os.getcwd(), '..')))
# PyTorch Lightning
import pytorch_lightning as L

# PyTorch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

# PyTorch geometric
import torch_geometric
import torch_geometric.data as geom_data
import torch_geometric.nn as geom_nn

from torch_geometric.nn import GATv2Conv
from torch_geometric.nn import GCNConv
from torch_geometric.utils import scatter
from torch.nn import Linear, ModuleList, ReLU
from torch_geometric.loader import DataLoader
from pytorch_lightning.callbacks import ModelCheckpoint
from torch.optim import AdamW
from pytorch_lightning.loggers import WandbLogger

from models.loss import NormalCRPS
from models.model_utils import MakePositive, EmbedStations
from utils.data import *

/home/ltchen/gnnpp


In [3]:
dataframes = load_dataframes(mode="train", leadtime="24h")
dist = load_distances(dataframes["stations"])
max_dist = 100

graphs_train_rf, tests = normalize_features_and_create_graphs(
    training_data=dataframes["train"],
    valid_test_data=[dataframes["test_rf"], dataframes["test_f"]],
    mat=dist,
    max_dist=max_dist,
)
graphs_test_rf, graphs_test_f = tests

graphs_test = graphs_test_rf

[INFO] Dataframes exist. Will load pandas dataframes.
[INFO] Computing distances...
[INFO] Normalizing features...
[INFO] Creating graph data...


In [4]:
#print(graphs_train_rf) #(1342, 36)
print(next(iter(graphs_train_rf)))

Data(x=[1342, 36], edge_index=[2, 29040], edge_attr=[29040, 1], y=[122], timestamp=1997-01-02 00:00:00, n_idx=[1342])


In [9]:
graphs_train_rf[0].y.shape

torch.Size([122])

In [10]:
batch_size = 8
train_loader = DataLoader(graphs_train_rf, batch_size=batch_size, shuffle=True)

## Simple GNN with Conv

In [24]:
class SimpleGCN(L.LightningModule):
    def __init__(self, in_features, h_features, out_features, optimizer_class, optimizer_params):
        super().__init__()
        self.conv1 = GCNConv(in_features, h_features)
        self.out = Linear(h_features, out_features)
        self.loss = torch.nn.MSELoss()
        self.optimizer_class = optimizer_class
        self.optimizer_params = optimizer_params

    def forward(self, data):
        x, edge_index, edge_attr, batch_id = data.x, data.edge_index, data.edge_attr, data.batch
        h = self.conv1(x, edge_index).relu()
        z = self.out(h)
        return h, z

    def training_step(self, batch, batch_idx):
        y_hat = self.forward(batch)
        loss = self.loss = self.loss(y_hat, batch.y)
        return loss

    def validation_step(self, batch, batch_idx):
        y_hat = self.forward(batch)
        loss = self.loss = self.loss(y_hat, batch.y)
        return loss

    def test_step(self, batch, batch_idx):
        y_hat = self.forward(batch)
        loss = self.loss = self.loss(y_hat, batch.y)
        return loss

    def configure_optimizers(self):
        return self.optimizer_class(self.parameters(), **self.optimizer_params)

    def initialize(self, dataloader):
        batch = next(iter(dataloader))
        self.validation_step(batch, 0)

    def predict(self, batch, batch_idx):
        y_hat = self.forward(batch)
        return y_hat



In [22]:
in_channels = graphs_train_rf[0].x.shape[1] #+ emb_dim - 1
model = SimpleGCN(in_features=in_channels, h_features=100, out_features=1, optimizer_class=AdamW, optimizer_params={"lr": 0.001})
print(model)
batch = next(iter(train_loader))
model.forward(batch)

SimpleGCN(
  (conv1): GCNConv(36, 100)
  (out): Linear(in_features=100, out_features=1, bias=True)
  (loss): MSELoss()
)


(tensor([[ 0.2997,  0.2761,  0.6848,  ...,  0.0000,  0.5249,  0.0000],
         [ 0.4581,  0.8263,  2.5109,  ...,  0.0000,  1.5318,  0.0000],
         [ 0.3536,  0.5003,  0.9121,  ...,  0.0000,  0.7451,  0.0000],
         ...,
         [ 3.9907, 11.6466, 22.6003,  ...,  0.0000, 15.4302,  0.0000],
         [ 4.1053, 12.1910, 23.1547,  ...,  0.0000, 15.7059,  0.0000],
         [ 4.1972, 12.6102, 23.9195,  ...,  0.0000, 16.0886,  0.0000]],
        grad_fn=<ReluBackward0>),
 tensor([[ 0.2388],
         [-0.2182],
         [ 0.3353],
         ...,
         [-4.7022],
         [-4.7296],
         [-4.8060]], grad_fn=<AddmmBackward0>))

In [23]:
trainer = L.Trainer(max_epochs=30)
trainer.fit(model, train_loader)

Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name  | Type    | Params | Mode 
------------------------------------------
0 | conv1 | GCNConv | 3.7 K  | train
1 | out   | Linear  | 101    | train
2 | loss  | MSELoss | 0      | train
------------------------------------------
3.8 K     Trainable params
0         Non-trainable params
3.8 K     Total params
0.015     Total estimated model params size (MB)
5         Modules in train mode
0         Modules in eval mode


Epoch 0:   0%|          | 0/431 [00:00<?, ?it/s] 

AttributeError: 'tuple' object has no attribute 'size'

## Simple GNN only with GATv2Conv
- GATv2Conv
- mean aggregation

## GNN with DeepSetAggregator (without GAT)


## GNN with GAT and DeepSetAggregator

##