In [9]:
# @title Libraries
import torch
import os
from torch_geometric.datasets import TUDataset
from torch_geometric.loader import DataLoader
from torch.nn import Linear
import torch.nn.functional as F
from torch_geometric.nn import GCNConv,GraphConv,GAT
from torch_geometric.nn import global_mean_pool
from tqdm import tqdm

## Creating Data

In [10]:
dataset = TUDataset(root='data/', name='MUTAG')

In [20]:
print(f'graphs: {len(dataset)}, features: {dataset.num_features}, classes: {dataset.num_classes}')


graphs: 188, features: 7, classes: 2


In [12]:
dataset = dataset.shuffle()
train, test = dataset[:140], dataset[140:]

In [13]:
train_loader = DataLoader(train, batch_size=50, shuffle=True)
test_loader = DataLoader(test, batch_size=50, shuffle=False)

##  Model

In [21]:
class MyModel(torch.nn.Module):
  def __init__(self, hidden_channels,conv1,conv2,conv3):
    super(MyModel, self).__init__()
    torch.manual_seed(2024)
    self.conv1 = conv1
    self.conv2 = conv2
    self.conv3 = conv3
    self.lin = Linear(hidden_channels, dataset.num_classes)

  def forward(self, x, edge_index, batch):
    x = self.conv1(x, edge_index)
    x = F.relu(x)  # Fixed: Apply relu activation function
    x = self.conv2(x, edge_index)
    x = F.relu(x)  # Fixed: Apply relu activation function
    x = self.conv3(x, edge_index)
    x = global_mean_pool(x, batch)
    x = F.dropout(x, p=0.5, training=self.training)
    x = self.lin(x)
    return x

In [23]:
class ModelHandler():
  def __init__(self, model, epochs, learning_rate=0.01):
    self.model = model
    self.epochs = epochs
    self.optimizer = torch.optim.Adam(self.model.parameters(), lr=learning_rate)
    self.criterion = torch.nn.CrossEntropyLoss()


  def train(self, loader):
    with tqdm(range(self.epochs), unit='epoch') as tepochs:
      tepochs.set_description('Training')
      for _ in tepochs:
        self.model.train()
        for data in loader: # Iterate in batches over the training dataset.
          out = self.model(data.x, data.edge_index, data.batch) # Perform a single forward pass.
          loss = self.criterion(out, data.y) # Compute the loss.
          loss.backward() # Derive gradients.
          self.optimizer.step() # Update parameters based on gradients.
          self.optimizer.zero_grad() # Clear gradients.


  def test(self, loader):
    self.model.eval()
    correct = 0
    for data in loader: # Iterate in batches over the training/test dataset.
      out = self.model(data.x, data.edge_index, data.batch)
      pred = out.argmax(dim=1) # Use the class with highest probability.
      correct += int((pred == data.y).sum()) # Check against ground-truth labels
    return correct / len(loader.dataset) # Derive ratio of correct predictions.

## Using GCNConv Layer

In [24]:
model = MyModel(hidden_channels=64,
                conv1=GCNConv(dataset.num_node_features, 64),
                conv2=GCNConv(64, 64),
                conv3=GCNConv(64, 64))
print(model)

handler = ModelHandler(model, 170)

handler.train(train_loader)
print('\n')
print(handler.test(train_loader))
print(handler.test(test_loader))

MyModel(
  (conv1): GCNConv(7, 64)
  (conv2): GCNConv(64, 64)
  (conv3): GCNConv(64, 64)
  (lin): Linear(in_features=64, out_features=2, bias=True)
)


Training: 100%|██████████| 170/170 [00:03<00:00, 52.85epoch/s]



0.8142857142857143
0.75





## Using GraphConv Layer

In [25]:
model = MyModel(hidden_channels=64,
                conv1=GraphConv(dataset.num_node_features, 64),
                conv2=GraphConv(64, 64),
                conv3=GraphConv(64, 64))
print(model)

handler = ModelHandler(model, 170)

handler.train(train_loader)
print('\n')
print(handler.test(train_loader))
print(handler.test(test_loader))

MyModel(
  (conv1): GraphConv(7, 64)
  (conv2): GraphConv(64, 64)
  (conv3): GraphConv(64, 64)
  (lin): Linear(in_features=64, out_features=2, bias=True)
)


Training: 100%|██████████| 170/170 [00:03<00:00, 53.65epoch/s]



0.9214285714285714
0.875





## Using GAT Layer

In [26]:
model = MyModel(hidden_channels=64,
                conv1=GAT(dataset.num_node_features, 64, 3),
                conv2=GAT(64, 64, 3),
                conv3=GAT(64, 64, 3))
print(model)

handler = ModelHandler(model, 170)

handler.train(train_loader)
print('\n')
print(handler.test(train_loader))
print(handler.test(test_loader))

MyModel(
  (conv1): GAT(7, 64, num_layers=3)
  (conv2): GAT(64, 64, num_layers=3)
  (conv3): GAT(64, 64, num_layers=3)
  (lin): Linear(in_features=64, out_features=2, bias=True)
)


Training: 100%|██████████| 170/170 [00:10<00:00, 16.80epoch/s]



0.7714285714285715
0.6875






## Minibatch

In order to analyze complex networks, we can construct ego-networks for each node in the graph. An ego-network is a subgraph that includes the focal node and its neighboring nodes up to a certain depth. This sampling procedure is inspired by the neighborhood sampling method proposed by Hamilton et al. (2017a). 

To create an ego-network, we start with a focal node v ∈ V and sample all nodes within a distance of at most d from v. The sampling is performed at each level, with replacement of a fixed number of neighbors. This ensures that the resulting ego-networks have equal size. 

By generating multiple ego-networks, we can create a minibatch of size b. This allows us to analyze and process multiple ego-networks simultaneously, which is particularly useful for large-scale network analysis.
