In [1]:
# @title Libraries
import torch
import os
from torch_geometric.datasets import TUDataset
from torch_geometric.loader import DataLoader
from torch.nn import Linear
import torch.nn.functional as functional
from torch_geometric.nn import GCNConv,GraphConv,GAT
from torch_geometric.nn import global_mean_pool
from tqdm import tqdm

ModuleNotFoundError: No module named 'torch'

## Creating Data

In [2]:
dataset = TUDataset(root='data/', name='MUTAG')

Downloading https://www.chrsmrrs.com/graphkerneldatasets/MUTAG.zip
Processing...
Done!


In [3]:
print(f'# graphs: {len(dataset)}')
print(f'# features: {dataset.num_features}')
print(f'# classes: {dataset.num_classes}')

# graphs: 188
# features: 7
# classes: 2


In [24]:
dataset = dataset.shuffle()
train, test = dataset[:140], dataset[140:]

In [25]:
train_loader = DataLoader(train, batch_size=50, shuffle=True)
test_loader = DataLoader(test, batch_size=50, shuffle=False)

## Creating Model

In [6]:
class MyModel(torch.nn.Module):
  def __init__(self, hidden_channels,conv1,conv2,conv3):
    super(MyModel, self).__init__()
    torch.manual_seed(2024)
    self.conv1 = conv1
    self.conv2 = conv2
    self.conv3 = conv3
    self.lin = Linear(hidden_channels, dataset.num_classes)

  def forward(self, x, edge_index, batch):
    # 1. Obtain node embeddings
    x = self.conv1(x, edge_index)
    x = x.relu()
    x = self.conv2(x, edge_index)
    x = x.relu()
    x = self.conv3(x, edge_index)
    # 2. Readout layer
    x = global_mean_pool(x, batch) # [batch_size, hidden_channels]
    # 3. Apply a final classifier
    x = F.dropout(x, p=0.5, training=self.training)
    x = self.lin(x)
    return x

In [16]:
class ModelHandler():
  def __init__(self, model, epochs, learning_rate=0.01):
    self.model = model
    self.epochs = epochs
    self.optimizer = torch.optim.Adam(self.model.parameters(), lr=learning_rate)
    self.criterion = torch.nn.CrossEntropyLoss()


  def train(self, loader):
    with tqdm(range(self.epochs), unit='epoch') as tepochs:
      tepochs.set_description('Training')
      for epoch in tepochs:
        self.model.train()
        for data in loader: # Iterate in batches over the training dataset.
          out = self.model(data.x, data.edge_index, data.batch) # Perform a single forward pass.
          loss = self.criterion(out, data.y) # Compute the loss.
          loss.backward() # Derive gradients.
          self.optimizer.step() # Update parameters based on gradients.
          self.optimizer.zero_grad() # Clear gradients.


  def test(self, loader):
    self.model.eval()
    correct = 0
    for data in loader: # Iterate in batches over the training/test dataset.
      out = self.model(data.x, data.edge_index, data.batch)
      pred = out.argmax(dim=1) # Use the class with highest probability.
      correct += int((pred == data.y).sum()) # Check against ground-truth labels
    return correct / len(loader.dataset) # Derive ratio of correct predictions.

## Using GCNConv Layer

In [21]:
model = MyModel(hidden_channels=64,
                conv1=GCNConv(dataset.num_node_features, 64),
                conv2=GCNConv(64, 64),
                conv3=GCNConv(64, 64))
print(model)

handler = ModelHandler(model, 170)

handler.train(train_loader)
print('\n')
print(handler.test(train_loader))
print(handler.test(test_loader))

MyModel(
  (conv1): GCNConv(7, 64)
  (conv2): GCNConv(64, 64)
  (conv3): GCNConv(64, 64)
  (lin): Linear(in_features=64, out_features=2, bias=True)
)


Training: 100%|██████████| 170/170 [00:07<00:00, 23.50epoch/s]




0.8142857142857143
0.6666666666666666


## Using GraphConv Layer

In [22]:
model = MyModel(hidden_channels=64,
                conv1=GraphConv(dataset.num_node_features, 64),
                conv2=GraphConv(64, 64),
                conv3=GraphConv(64, 64))
print(model)

handler = ModelHandler(model, 170)

handler.train(train_loader)
print('\n')
print(handler.test(train_loader))
print(handler.test(test_loader))

MyModel(
  (conv1): GraphConv(7, 64)
  (conv2): GraphConv(64, 64)
  (conv3): GraphConv(64, 64)
  (lin): Linear(in_features=64, out_features=2, bias=True)
)


Training: 100%|██████████| 170/170 [00:07<00:00, 22.36epoch/s]



0.9428571428571428
0.8541666666666666





## Using GAT Layer

In [23]:
model = MyModel(hidden_channels=64,
                conv1=GAT(dataset.num_node_features, 64, 3),
                conv2=GAT(64, 64, 3),
                conv3=GAT(64, 64, 3))
print(model)

handler = ModelHandler(model, 170)

handler.train(train_loader)
print('\n')
print(handler.test(train_loader))
print(handler.test(test_loader))

MyModel(
  (conv1): GAT(7, 64, num_layers=3)
  (conv2): GAT(64, 64, num_layers=3)
  (conv3): GAT(64, 64, num_layers=3)
  (lin): Linear(in_features=64, out_features=2, bias=True)
)


Training: 100%|██████████| 170/170 [00:25<00:00,  6.66epoch/s]




0.8142857142857143
0.7083333333333334


## Minibatch
we can construct an ego-network for each node in the graph and use this ego-network as the subgraph. This sampling procedure is similar to the neighbourhood sampling method proposed by Hamilton et al. (2017a). Given a node v ∈ V, its ego-network of depth d is the induced subgraph obtained from a sample of all nodes with a distance of at most d to v. Sampling is done at each level, with replacement of a fixed amount of neighbours. This is to make the subgraphs to have equal size. To create a batch of size b, we create b such ego-networks.