## **Pytorch-Geometric Benchmarks**
- More deeply implements about default benchmark graph datasets of the pytorch-geometric

In [36]:
import torch
import numpy as np
from torch_geometric.data import Data

## **Karate-Club**
- Zachary’s karate club network from the “An Information Flow Model for Conflict and Fission in Small Groups” paper.
- The graph containing **34 nodes**, connected by **154 (undirected and unweighted) edges**.
- Every node is labeled by **one of two classes**.
- ***Node classification problem***

In [3]:
from torch_geometric.datasets import KarateClub

In [23]:
dataset_karateclub = KarateClub()
dataset_karateclub = dataset_karateclub[0]
print(dataset_karateclub)

Zachary's Karate Club
Data(edge_index=[2, 156], x=[34, 34], y=[34])


#### *Original Source Code of the Dataset*

In [20]:
import numpy as np
import networkx as nx
from torch_geometric.data import InMemoryDataset, Data

class KarateClub(InMemoryDataset):
    r"""Zachary's karate club network from the `"An Information Flow Model for
    Conflict and Fission in Small Groups"
    <http://www1.ind.ku.dk/complexLearning/zachary1977.pdf>`_ paper, containing
    34 nodes, connected by 154 (undirected and unweighted) edges.
    Every node is labeled by one of two classes.

    Args:
        transform (callable, optional): A function/transform that takes in an
            :obj:`torch_geometric.data.Data` object and returns a transformed
            version. The data object will be transformed before every access.
            (default: :obj:`None`)
    """
    def __init__(self, transform=None):
        super(KarateClub, self).__init__('.', transform, None, None)

        G = nx.karate_club_graph() 
        print(G)
        adj = nx.to_scipy_sparse_matrix(G).tocoo()
        row = torch.from_numpy(adj.row.astype(np.int64)).to(torch.long)
        col = torch.from_numpy(adj.col.astype(np.int64)).to(torch.long)
        edge_index = torch.stack([row, col], dim=0)
        data = Data(edge_index=edge_index)
        data.num_nodes = edge_index.max().item() + 1
        data.x = torch.eye(data.num_nodes, dtype=torch.float)
        y = [0 if G.nodes[i]['club'] == 'Mr. Hi' else 1 for i in G.nodes]
        data.y = torch.tensor(y)
        self.data, self.slices = self.collate([data])

    def _download(self):
        return

    def _process(self):
        return

    def __repr__(self):
        return '{}()'.format(self.__class__.__name__)

In [30]:
dataset_karateclub = KarateClub()
dataset_karateclub = dataset_karateclub[0]
print(dataset_karateclub)

Zachary's Karate Club
Data(edge_index=[2, 156], x=[34, 34], y=[34])


#### *GNN model initialization - GCN* 

In [62]:
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

class Net(torch.nn.Module):
    def __init__(self, num_node_features, num_classes):
        super(Net, self).__init__()
        self.hidden_dims = 16
        self.conv1 = GCNConv(num_node_features, self.hidden_dims)
        self.conv2 = GCNConv(self.hidden_dims, num_classes)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)

        return F.log_softmax(x, dim=1)
    
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Net(num_node_features=34, num_classes=2).to(device)
print(model)

Net(
  (conv1): GCNConv(34, 16)
  (conv2): GCNConv(16, 2)
)


#### *Prepare the model training*

In [63]:
# Initialize the training and testing masks
num_nodes = len(dataset_karateclub['x'])
num_labels = len(dataset_karateclub['y'])
print("- number of nodes : {} / number of labels : {} (two have to same)".format(num_nodes, num_labels))

- number of nodes : 34 / number of labels : 34 (two have to same)


In [64]:
num_train = 10
num_test  = num_nodes - (num_train)
print("- number of trains : {} / number of tests : {}".format(num_train, num_test))

- number of trains : 10 / number of tests : 24


In [65]:
mask_index = np.arange(num_nodes)
np.random.shuffle(mask_index)
train_mask_index = mask_index[:num_train]
test_mask_index = mask_index[num_train:]
print("- train mask index : {}".format(train_mask_index))
print("- test  mask index : {}".format(test_mask_index))

- train mask index : [19 12 30 31  7 29 32 33 21 22]
- test  mask index : [20 28 27 18 23 14  8 13  5 25  1 11  3  2  9  6 10  0 24 16  4 26 17 15]


In [66]:
train_mask = np.zeros(num_nodes)
test_mask = np.zeros(num_nodes)
train_mask[train_mask_index] = 1
test_mask[test_mask_index] = 1
train_mask = torch.tensor(train_mask, dtype=torch.bool)
test_mask = torch.tensor(test_mask, dtype=torch.bool)
print("- train mask : {}".format(train_mask))
print("- test mask  : {}".format(test_mask))

- train mask : tensor([False, False, False, False, False, False, False,  True, False, False,
        False, False,  True, False, False, False, False, False, False,  True,
        False,  True,  True, False, False, False, False, False, False,  True,
         True,  True,  True,  True])
- test mask  : tensor([ True,  True,  True,  True,  True,  True,  True, False,  True,  True,
         True,  True, False,  True,  True,  True,  True,  True,  True, False,
         True, False, False,  True,  True,  True,  True,  True,  True, False,
        False, False, False, False])


In [67]:
def init_train_test_mask(N, ratio=0.2):
    mask_idx = np.arange(N)
    num_train = int(N*ratio)
    num_test  = N - num_train
    np.random.shuffle(mask_idx)
    train_idx = mask_idx[:num_train]
    test_idx  = mask_idx[num_train:]
    train_mask, test_mask = np.zeros(N), np.zeros(N)
    train_mask[train_idx] = 1
    test_mask[test_idx] = 1
    train_mask = torch.tensor(train_mask, dtype=torch.bool)
    test_mask = torch.tensor(test_mask, dtype=torch.bool)
    print("- number of trains : {} / number of tests : {}".format(num_train, num_test))
    return train_mask, test_mask

train_mask, test_mask = init_train_test_mask(num_nodes, ratio=0.35)

- number of trains : 11 / number of tests : 23


In [68]:
dataset_karateclub['train_mask'] = train_mask
dataset_karateclub['test_mask'] = test_mask

In [69]:
data = dataset_karateclub.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
print(data)

Data(edge_index=[2, 156], test_mask=[34], train_mask=[34], x=[34, 34], y=[34])


#### *Model training & evaluation*

In [70]:
for epoch in range(200):
    optimizer.zero_grad()
    out = model(data)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    if epoch % 20 == 0: 
        _, pred = model(data).max(dim=1)
        correct = float(pred[data.test_mask].eq(data.y[data.test_mask]).sum().item())
        acc = correct / data.test_mask.sum().item()
        print("- Epoch {} / Loss : {} / Test acc : {:.4f}".format(epoch, loss, acc))
    loss.backward()
    optimizer.step()

- Epoch 0 / Loss : 0.6804993748664856 / Test acc : 0.5217
- Epoch 20 / Loss : 0.3717789351940155 / Test acc : 0.4783
- Epoch 40 / Loss : 0.10899209976196289 / Test acc : 0.9130
- Epoch 60 / Loss : 0.04007513448596001 / Test acc : 0.8696
- Epoch 80 / Loss : 0.03131279721856117 / Test acc : 0.9130
- Epoch 100 / Loss : 0.015178724192082882 / Test acc : 0.9130
- Epoch 120 / Loss : 0.009972074069082737 / Test acc : 0.8261
- Epoch 140 / Loss : 0.004040815401822329 / Test acc : 0.8696
- Epoch 160 / Loss : 0.008372143842279911 / Test acc : 0.9130
- Epoch 180 / Loss : 0.0061471895314753056 / Test acc : 0.8696
