# GNN Explore
- exploring SimpleConv, GAT, DeepSetAggregator

In [11]:
%cd /home/ltchen/gnnpp

import sys
import os
sys.path.insert(0, os.path.abspath(os.path.join(os.getcwd(), '..')))
# PyTorch Lightning
import pytorch_lightning as L
import wandb

# PyTorch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

# PyTorch geometric
import torch_geometric
import torch_geometric.data as geom_data
import torch_geometric.nn as geom_nn

from torch_geometric.nn import GATv2Conv, GCNConv
from torch_geometric.nn.aggr import MeanAggregation
from torch_geometric.utils import scatter
from torch.nn import Linear, ModuleList, ReLU
from torch_geometric.loader import DataLoader
from pytorch_lightning.callbacks import ModelCheckpoint
from torch.optim import AdamW
from pytorch_lightning.loggers import WandbLogger

from models.loss import NormalCRPS
from models.model_utils import MakePositive, EmbedStations
from utils.data import *
from torch_geometric.utils import to_networkx
from utils.data import *
import matplotlib as plt
import cartopy.crs as ccrs
import cartopy.feature as cfeature
from utils.plot import plot_map
import networkx as nx

/home/ltchen/gnnpp


In [4]:
dataframes = load_dataframes(mode="train", leadtime="24h")
dist = load_distances(dataframes["stations"])

dataframes = summary_statistics(dataframes)
train = dataframes["train"][0]
train_target = dataframes["train"][1]
test_rf = dataframes["test_rf"][0]
test_rf_target = dataframes["test_rf"][1]
test_f = dataframes["test_f"][0]
test_f_target = dataframes["test_f"][1]

[INFO] Dataframes exist. Will load pandas dataframes.
[INFO] Loading distances from file...
[INFO] Calculating summary statistics for train
[INFO] Calculating summary statistics for test_rf
[INFO] Calculating summary statistics for test_f


In [36]:
train_target

Unnamed: 0,time,station_id,t2m
0,1997-01-02,0,277.75
1,1997-01-02,1,279.55
2,1997-01-02,2,276.45
3,1997-01-02,3,275.75
4,1997-01-02,4,279.35
...,...,...,...
420651,2013-12-31,117,281.35
420652,2013-12-31,118,279.35
420653,2013-12-31,119,278.25
420654,2013-12-31,120,273.15


In [3]:
# nur ein ensemble mitglied (if we don't use summary statistics)
train = dataframes["train"][0][dataframes['train'][0]['number'] == 0]
train_target = dataframes["train"][1]
test_rf = dataframes["test_rf"][0][dataframes['test_rf'][0]['number'] == 0]
test_rf_target = dataframes["test_rf"][1]
test_f = dataframes["test_f"][0][dataframes['test_f'][0]['number'] == 0]
test_f_target = dataframes["test_f"][1]
test_f_target

Unnamed: 0,time,station_id,t2m
0,2017-01-01,0,278.65
1,2017-01-01,1,275.25
2,2017-01-01,2,279.75
3,2017-01-01,3,279.15
4,2017-01-01,4,275.05
...,...,...,...
89055,2018-12-31,117,277.95
89056,2018-12-31,118,276.85
89057,2018-12-31,119,276.35
89058,2018-12-31,120,270.65


In [5]:
max_dist = 100
graphs_train_rf, tests = normalize_features_and_create_graphs(
    training_data=(train, train_target),
    valid_test_data=[(test_rf, test_rf_target), (test_f, test_f_target)],
    mat=dist,
    max_dist=max_dist,
)

graphs_test_rf, graphs_test_f = tests

graphs_test = graphs_test_rf

[INFO] Normalizing features...
[INFO] Creating graph data...


In [6]:
#print(graphs_train_rf) #(1342, 36)
print(next(iter(graphs_train_rf)))
print(len(graphs_train_rf))

Data(x=[122, 65], edge_index=[2, 1420], edge_attr=[1420, 1], y=[122], timestamp=1997-01-02 00:00:00, n_idx=[122])
3448


In [10]:
print(graphs_train_rf[0].x.shape)
print(graphs_train_rf[0].y.shape)
#print(graphs_train_rf[0].x)

# drop nans in target? => gradienten können dann nciht berechnet werden

# standardize data correctly? (should be standardized using normalize_features_and_create_graphs...
# target size: [975] vs. [975, 1]

torch.Size([122, 36])
torch.Size([122])


In [7]:
batch_size = 8
train_loader = DataLoader(graphs_train_rf, batch_size=batch_size, shuffle=True)
test_f_loader = DataLoader(graphs_test_f, batch_size=batch_size, shuffle=False)

## Simple GNN with Conv

In [45]:
class DeepSetAggregator(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(DeepSetAggregator, self).__init__()

        self.input = torch.nn.Linear(in_channels, hidden_channels)
        self.hidden1 = torch.nn.Linear(hidden_channels, hidden_channels)
        self.hidden2 = torch.nn.Linear(hidden_channels, hidden_channels)
        self.output = torch.nn.Linear(hidden_channels, out_channels)
        self.relu = torch.nn.ReLU()

    def forward(self, x, index):
        x = self.input(x)
        x = self.relu(x)
        print(f"Input: {x.shape}")
        x = self.hidden1(x)
        x = self.relu(x)
        print(f"Hidden1: {x.shape}")
        x = scatter(x, index, dim=0, reduce="mean")
        print(f"scatter: {x.shape}")
        print(f"index: {index}")
        self.hidden2(x)
        x = self.relu(x)
        print(f"Hidden2: {x.shape}")
        x = self.output(x)
        return x

class SimpleGCN(L.LightningModule):
    def __init__(self, in_features, h_features, out_features, optimizer_class, optimizer_params):
        super().__init__()
        self.conv1 = GCNConv(in_features, h_features)
        self.out = Linear(h_features, out_features)
        self.loss = NormalCRPS()
        self.aggr = DeepSetAggregator(in_channels=out_features, hidden_channels=h_features, out_channels=2)
        self.optimizer_class = optimizer_class
        self.optimizer_params = optimizer_params
        self.save_hyperparameters()

    def forward(self, data):
        x, edge_index, edge_attr, batch_id, node_idx = data.x, data.edge_index, data.edge_attr, data.batch, data.n_idx
        node_idx = node_idx + batch_id * 122
        h = self.conv1(x, edge_index).relu()
        z = self.out(h) #.squeeze()
        print(f"z: {z.shape}")
        print(f"Output from model - Min: {z.min()}, Max: {z.max()}, Mean: {z.mean()}") #wieso mehrere Werte
        z = self.aggr(z, node_idx) # => aggregate to mean
        return z

    def training_step(self, batch, batch_idx):
        y_hat = self.forward(batch)
        #print(f"y_hat: {y_hat}")
        #print(f"y_hat size: {y_hat.shape}")
        #print(f"Batch y: {batch.y}")
        #print(f"batch y size: {batch.y.shape}")
        loss = self.loss.crps(mu_sigma=y_hat, y=batch.y)
        #print(f"Loss: {loss}")
        #print(f"loss size: {loss.shape}")
        self.log('train_loss', loss, on_epoch=True)
        return loss

    def validation_step(self, batch, batch_idx):
        y_hat = self.forward(batch)
        loss = self.loss.crps(mu_sigma=y_hat, y=batch.y)
        self.log('train_loss', loss, on_epoch=True)
        return loss

    def test_step(self, batch, batch_idx):
        y_hat = self.forward(batch)
        loss = self.loss.crps(mu_sigma=y_hat, y=batch.y)

        self.log('train_loss', loss, on_epoch=True)
        return loss

    def configure_optimizers(self):
        return self.optimizer_class(self.parameters(), **self.optimizer_params)

    def initialize(self, dataloader):
        batch = next(iter(dataloader))
        print(f"batch: {batch}")
        self.validation_step(batch, 0)

    def predict(self, batch, batch_idx):
        y_hat = self.forward(batch)
        return y_hat



In [46]:
in_channels = graphs_train_rf[0].x.shape[1]

# model = SimpleGCN(in_features=in_channels, h_features=100, out_features=1, optimizer_class=AdamW, optimizer_params={"lr": 0.001})
model = SimpleGCN(in_features=in_channels, h_features=100, out_features=2, optimizer_class=torch.optim.SGD, optimizer_params={"lr": 0.001})

#print(model)
train_iter = iter(train_loader)
batch = next(train_iter)
batch2 = next(train_iter)
print(batch.y.shape)
#print(len(batch))
model.forward(batch)
# print(model.forward(batch2))
print(batch)
print(batch2)


torch.Size([976])
z: torch.Size([976, 2])
Output from model - Min: -0.6658477783203125, Max: 2.8907713890075684, Mean: 1.0742884874343872
Input: torch.Size([976, 100])
Hidden1: torch.Size([976, 100])
scatter: torch.Size([976, 100])
index: tensor([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,
         14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,
         28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,
         42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,
         56,  57,  58,  59,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,
         70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,  81,  82,  83,
         84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,  97,
         98,  99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
        112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125,
        126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 1

In [43]:
with wandb.init(
        project="gnn_explore",
        id=f"SimpleConv",
        #config=config,
        tags=["exploration"],
):
    in_channels = graphs_train_rf[0].x.shape[1] #+ emb_dim - 1

    model = SimpleGCN(in_features=in_channels, h_features=100, out_features=1, optimizer_class=torch.optim.SGD, optimizer_params={"lr": 0.00001}) #0.001
    batch = next(iter(train_loader))
    wandb_logger = WandbLogger(project="gnn_explore")


    trainer = L.Trainer(max_epochs=30,
        log_every_n_steps=10,
        accelerator="gpu",
        enable_progress_bar=True,
        enable_model_summary=True,
        logger=wandb_logger,)

    trainer.fit(model=model, train_dataloaders=train_loader)

    final_loss = trainer.logged_metrics["train_loss"]
    print("Final CRPS Loss:", final_loss)

Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/home/ltchen/.conda/envs/gnn_env4/lib/python3.10/site-packages/pytorch_lightning/trainer/configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
/home/ltchen/.conda/envs/gnn_env4/lib/python3.10/site-packages/pytorch_lightning/loggers/wandb.py:396: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.
/home/ltchen/.conda/envs/gnn_env4/lib/python3.10/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:

Epoch 0:   0%|          | 0/431 [00:00<?, ?it/s] z: torch.Size([976, 1])
Output from model - Min: -0.7958552241325378, Max: 1.5779939889907837, Mean: 0.05167920142412186


Traceback (most recent call last):
  File "/tmp/ipykernel_1055092/2216048651.py", line 21, in <module>
    trainer.fit(model=model, train_dataloaders=train_loader)
  File "/home/ltchen/.conda/envs/gnn_env4/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 538, in fit
    call._call_and_handle_interrupt(
  File "/home/ltchen/.conda/envs/gnn_env4/lib/python3.10/site-packages/pytorch_lightning/trainer/call.py", line 47, in _call_and_handle_interrupt
    return trainer_fn(*args, **kwargs)
  File "/home/ltchen/.conda/envs/gnn_env4/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 574, in _fit_impl
    self._run(model, ckpt_path=ckpt_path)
  File "/home/ltchen/.conda/envs/gnn_env4/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 981, in _run
    results = self._run_stage()
  File "/home/ltchen/.conda/envs/gnn_env4/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 1025, in _run_stage
    self.fit_loop

Epoch 0:   0%|          | 0/431 [1:10:32<?, ?it/s]
Epoch 0:   0%|          | 0/431 [1:02:38<?, ?it/s]
Epoch 0:   0%|          | 0/431 [1:01:43<?, ?it/s]


  lambda data: self._console_raw_callback("stdout", data),


ValueError: not enough values to unpack (expected 2, got 1)

Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/home/ltchen/.conda/envs/gnn_env4/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py:75: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `pytorch_lightning` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by default
/home/ltchen/.conda/envs/gnn_env4/lib/python3.10/site-pac

Epoch 0:   3%|▎         | 13/431 [00:00<00:22, 18.49it/s, v_num=82]

  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 29: 100%|██████████| 431/431 [00:03<00:00, 109.48it/s, v_num=82]

`Trainer.fit` stopped: `max_epochs=30` reached.


Epoch 29: 100%|██████████| 431/431 [00:03<00:00, 109.31it/s, v_num=82]


In [None]:
preds_list = []


targets = test_f[1] # R2F
preds = trainer.predict(model=model, dataloaders=test_f_loader) #R2F
print("test_rf[1]:")
print(targets)

preds = torch.cat(preds, dim=0)
# Reverse transform of the y_scaler (only on the mean)
# preds[:, 0] = torch.Tensor(y_scaler.inverse_transform(preds[:, 0].view(-1, 1))).flatten()
preds_list.append(preds)

targets = torch.Tensor(targets.t2m.values)
print("t2m values:")
print(targets)

stacked = torch.stack(preds_list)
final_preds = torch.mean(stacked, dim=0)
print("final_preds")
print(final_preds)

res = model.loss(final_preds, targets)
print("#############################################")
print("#############################################")
print(f"final mse: {res.item()}")
print("#############################################")
print("#############################################")

## Simple GNN only with GATv2Conv
- GATv2Conv
- mean aggregation

In [None]:
# GAT => was macht das aus? GATv2Conv, get_attentions?
class GAT(torch.nn.Module):
    def __init__(self, in_features, h_features, out_features, num_heads):
        super(GAT, self).__init__()

        # layers
        self.conv1 = GATv2Conv(in_features, h_features, heads=num_heads, edge_dim=1, add_self_loop=True, fill_value=0.01)
        self.out = Linear(h_features * num_heads, out_features)
        self.relu = nn.ReLU()
        # self.loss = ??

    def forward(self, x, edge_index, edge_attr):
        x = x.float() # why?
        edge_attr = edge_attr.float()
        h = self.conv1(x, edge_index, edge_attr).relu()
        z = self.out(h).squeeze()
        return z

    @torch.no_grad()
    def get_attention(self, data):
        x, edge_index, edge_attr = data.x, data.edge_index, data.edge_attr
        x = x.float()
        edge_attr = edge_attr.float()
        attention_list = []

        edge_index_attention, attention_weights = None, None
        x, (edge_index_attention, attention_weights) = self.conv1(x, edge_index, edge_attr, return_attention_weights=True)
        attention_list.append(attention_weights)
        x = self.relu(x)
        x = self.norm(x)
        x = self.out(x)
        return x, edge_index_attention, attention_weights, attention_list


In [None]:
class LGAT(L.LightningModule):
    def __init__(self, in_features, h_features, out_features, num_heads, optimizer_class, optimizer_params):
        super(LGAT, self).__init__()
        self.conv = GAT(in_features, h_features, out_features, num_heads)
        self.postprocess = MakePositive()
        self.loss_fn = NormalCRPS()
        self.optimizer_class = optimizer_class
        self.optimizer_params = optimizer_params

    def forward(self, data):
        x, edge_index, edge_attr, batch_id, node_idx = data.x, data.edge_index, data.edge_attr, data.batch, data.n_idx
        node_idx = node_idx + batch_id * 122  # add batch_id to node_idx to get unique node indices
        x = self.encoder(x)
        x = self.conv(x, edge_index, edge_attr)
        x = self.aggr(x, node_idx)
        x = self.postprocess(x)
        return x

    def training_step(self, batch, batch_idx):
        y_hat = self.forward(batch)
        loss = self.loss_fn.crps(mu_sigma=y_hat, y=batch.y)
        self.log(
            "train_loss", loss, on_step=True, on_epoch=True, prog_bar=True, batch_size=1
        )  # The batch size is not actually 1 but the loss is already averaged over the batch
        return loss

    def configure_optimizers(self):
        return self.optimizer_class(self.parameters(), **self.optimizer_params)

    def validation_step(self, batch, batch_idx):
        y_hat = self.forward(batch)
        loss = self.loss_fn.crps(mu_sigma=y_hat, y=batch.y)
        self.log("val_loss", loss, on_step=False, on_epoch=True, prog_bar=True, batch_size=1)
        return loss

    def test_step(self, batch, batch_idx, dataloader_idx=0):
        y_hat = self.forward(batch)
        loss = self.loss_fn.crps(mu_sigma=y_hat, y=batch.y)
        self.log("test_loss", loss, on_step=False, on_epoch=True, prog_bar=True, batch_size=1)
        return loss

    def initialize(self, dataloader):
        batch = next(iter(dataloader))
        self.validation_step(batch, 0)

In [None]:
# Moritz ResGNN as reference
class ResGnn(torch.nn.Module):
    def __init__(self, in_channels: int, out_channels: int, num_layers: int, hidden_channels: int, heads: int):
        super(ResGnn, self).__init__()
        assert num_layers > 0, "num_layers must be > 0."

        # Create Layers
        self.convolutions = ModuleList()
        for _ in range(num_layers):
            self.convolutions.append(
                GATv2Conv(-1, hidden_channels, heads=heads, edge_dim=1, add_self_loops=True, fill_value=0.01)
            )
        self.lin = Linear(hidden_channels * heads, out_channels)
        self.relu = ReLU()

    def forward(self, x: torch.Tensor, edge_index: torch.Tensor, edge_attr: torch.Tensor) -> torch.Tensor:
        x = x.float()
        edge_attr = edge_attr.float()
        for i, conv in enumerate(self.convolutions):
            if i == 0:
                # First Layer
                x = conv(x, edge_index, edge_attr)
                x = self.relu(x)
            else:
                x = x + self.relu(conv(x, edge_index, edge_attr))  # Residual Layers

        x = self.lin(x)
        return x

    @torch.no_grad()
    def get_attention(
        self, x: torch.Tensor, edge_index: torch.Tensor, edge_attr: torch.Tensor
    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
        """
        Runs a forward Pass for the given graph only though the ResGNN layer.
        NOTE: the data that is given to this method must first pass through the layers before this layer in the Graph

        :param torch.Tensor x: Tensor of Node Features (NxD)
        :param torch.Tensor edge_index: Tensor of Edges (2xE)
        :param torch.Tensor edge_attr: Edge Attributes (ExNum_Attr)
        :return x, edge_index_attention, attention_weights: Tensor of Node Features (NxD), Tensor of Edges with
        self loops (2xE), Tensor of Attention per edge (ExNum_Heads)
        """
        x = x.float()
        edge_attr = edge_attr.float()

        # Pass Data though Layer to get the Attention
        attention_list = []
        # Note: edge_index_attention has to be added since we have self loops now
        edge_index_attention, attention_weights = None, None

        for i, conv in enumerate(
            self.convolutions,
        ):
            if i == 0:
                # First Layer
                x, (edge_index_attention, attention_weights) = conv(
                    x, edge_index, edge_attr, return_attention_weights=True
                )
                attention_list.append(attention_weights)
                x = self.relu(x)
                x = self.norm(x)
            else:
                x_conv, (edge_index_attention, attention_weights) = conv(
                    x, edge_index, edge_attr, return_attention_weights=True
                )
                attention_list.append(attention_weights)
                x = x + self.relu(x_conv)  # Residual Layers
        x = self.lin(x)

        # Attention weights of first layer
        attention_weights = attention_weights.mean(dim=1)

        return x, edge_index_attention, attention_weights, attention_list

## GNN with DeepSetAggregator (without GAT)


## GNN with GAT and DeepSetAggregator

##