### Homework 2: Graph Neural Networks

- Author: Ludek Cizinsky (`ludek.cizinsky@epfl.ch`)

### Notebeook setup

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# Hugging face util to download dataset
from datasets import load_dataset

# Scikit-learn
from sklearn.model_selection import train_test_split

# PyTorch
import torch
from torch import nn
# - Dataloader
from torch.utils.data import Dataset, DataLoader
# - Optimization
from torch.optim import Adam, SGD
# - Loss
from torch.nn import BCELoss, BCEWithLogitsLoss

# Custom scripts
from scripts.dataset import GraphDataset
from scripts.layers import (
    # Normal
    GraphConv,
    # Graph Sage
    GraphSAGEConv,
    MeanAggregation,
    SumAggregation,
    SqrtDegAggregation,
    MaxPoolAggregation,
    # Graph Attention
    GraphAttentionConv,
    # Pooling
    MeanPool,
    MaxPool,
)

from scripts.architecture import (
    GNN
)

from scripts.utils import (
    train,
    evaluate,
)

  from .autonotebook import tqdm as notebook_tqdm


### Load the dataset

Some important notes:

- the dataset includes collection of chemical compounds represented as graphs (details are specified below), i.e., one sample is a graph and we have a corresponding ground truth label indicating whether the compound is mutagenic or not
- On the lower level, each node has associated embedding (one hot encoding) indicating its type, same goes for the edges, **the dimensions of these embeddings are different**

#### Download the dataset from Hugging Face (HF)

In [3]:
dataset_hf = load_dataset("graphs-datasets/MUTAG")['train']

#### Train, validation and test split

In [4]:
# Parse the dataset into X and y
X, y = [], []
for s in dataset_hf:
    X.append(s)
    y.extend(s['y'])

# Split the dataset into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Split the train dataset into train and validation
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

#### Load it using custom dataloader

In [5]:
# Define hyperparameters for the dataloader
batch_size = 30
shuffle = True

# Define custom stacking behavior for the dataloader
# def collate_fn(batch):
#     return list(zip(*batch))

# Define custom stacking behavior for the dataloader
def collate_fn(batch):
    inputs, labels = [], []
    
    for item in batch:
        input_dict, label = item
        inputs.append(input_dict)
        labels.append(label)
    
    # Convert the list of labels to a 1D tensor
    labels = torch.tensor(labels).view(-1).type(torch.FloatTensor)
    
    return inputs, labels

# Define custom datasets
train_dataset = GraphDataset(X_train, y_train)
val_dataset = GraphDataset(X_val, y_val)
test_dataset = GraphDataset(X_test, y_test)

# Define custom dataloaders
train_loader = DataLoader(
    train_dataset,
    batch_size=batch_size, 
    shuffle=shuffle,
    collate_fn=collate_fn
)

val_loader = DataLoader(
    val_dataset,
    batch_size=batch_size, 
    shuffle=shuffle,
    collate_fn=collate_fn
)

test_loader = DataLoader(
    test_dataset,
    batch_size=batch_size, 
    shuffle=shuffle,
    collate_fn=collate_fn
)

### Implementation of different graph and convolution and pooling layers

See the [layers.py file](scripts/layers.py) for the implementation details.

TODOs:
- [ ] Implement the attention conv

### Custom Network Design with Node Features

In [13]:
# Define the model's architecture
num_features = len(X_train[0]['node_feat'][0])
activation = nn.LeakyReLU()
pooling = MeanPool()
aggregation = SumAggregation()
dropout = 0.1
# architecture = [
    # [GraphSAGEConv, {'in_features': num_features, 'out_features': 256, 'aggregation': aggregation, 'activation': activation}],
    # [GraphSAGEConv, {'in_features': 256, 'out_features': 128, 'aggregation': aggregation, 'activation': activation}],
# ]
# architecture = [
    # [GraphConv, {'in_features': num_features, 'out_features': 64, 'activation': activation}],
    # [GraphConv, {'in_features': 64, 'out_features': 32, 'activation': activation}],
# ]

architecture = [
    [GraphAttentionConv, {'in_features': num_features, 'out_features': 64}],
    [GraphAttentionConv, {'in_features': 64, 'out_features': 32}],
]

# Define the model
model = GNN(architecture, pooling, dropout=dropout)

model = model.double()

# Define hyperparameters for the model
lr = 8e-1
epochs = 100
criterion = BCEWithLogitsLoss(pos_weight=torch.tensor(0.7, dtype=torch.double))
# optimizer = SGD(model.parameters(), lr=lr, weight_decay=1e-5)
optimizer = Adam(model.parameters(), lr=lr)

# Train the model
train(model, train_loader, criterion, optimizer, num_epochs=epochs)

# Evaluate the model on validation dataset
evaluate(model, val_loader, criterion)

Epoch [1/100](1.37s) Loss: 11.1565 F1(macro): 0.53%
Epoch [2/100](1.22s) Loss: 8.6351 F1(macro): 0.45%
Epoch [3/100](1.28s) Loss: 6.1863 F1(macro): 0.49%
Epoch [4/100](1.32s) Loss: 4.5414 F1(macro): 0.55%
Epoch [5/100](1.24s) Loss: 4.9583 F1(macro): 0.49%
Epoch [6/100](1.25s) Loss: 3.9032 F1(macro): 0.47%
Epoch [7/100](1.25s) Loss: 2.5505 F1(macro): 0.53%
Epoch [8/100](1.22s) Loss: 2.8496 F1(macro): 0.43%
Epoch [9/100](1.26s) Loss: 2.2104 F1(macro): 0.55%
Epoch [10/100](1.22s) Loss: 2.6149 F1(macro): 0.43%
Epoch [11/100](1.2s) Loss: 2.3611 F1(macro): 0.55%
Epoch [12/100](1.43s) Loss: 2.3070 F1(macro): 0.48%
Epoch [13/100](1.25s) Loss: 2.2659 F1(macro): 0.41%
Epoch [14/100](1.35s) Loss: 2.7244 F1(macro): 0.38%
Epoch [15/100](1.24s) Loss: 2.3376 F1(macro): 0.47%
Epoch [16/100](1.22s) Loss: 2.1587 F1(macro): 0.43%
Epoch [17/100](1.25s) Loss: 2.4640 F1(macro): 0.43%
Epoch [18/100](1.42s) Loss: 2.3927 F1(macro): 0.50%
Epoch [19/100](1.19s) Loss: 2.4790 F1(macro): 0.45%
Epoch [20/100](1.22s)