In [40]:
from torch_geometric.datasets import Planetoid
from torch_geometric.datasets import FacebookPagePage
from torch.nn import Linear, CrossEntropyLoss
import torch.nn.functional as F
from torch import relu as Relu
from torch import sum as Sum
from torch.optim import Adam
from torch.nn import Module
from torch import no_grad, eye
from torch.sparse import mm
from torch_geometric.utils import to_dense_adj


In [34]:
def get_dataset(name):
    if name == 'FacebookPagePage':
        dataset = FacebookPagePage(root='../data/FacebookPagePage')
        print(f'Dataset: {dataset}')
    elif name == 'Cora':
         dataset = Planetoid(root='../data/Cora', name='Cora')
         print(f'Dataset: {dataset.name}')
    data = dataset[0]
    
    print(f'Number of graphs: {len(dataset)}')
    print(f'Number of Nodes: {data.x.shape[0]}')
    print('Number of features: ', dataset.num_features)
    print('Number of classes: ', dataset.num_classes)
    print('----------------')
    print('Graph:')
    print(f'Are edges directed: {data.is_directed()}')
    print(f'Graph Has Isolated Nodes: {data.has_isolated_nodes()}')
    print(f'Graph has loops: {data.has_self_loops()}')
    
    return dataset

In [35]:
cora_dataset = get_dataset('Cora')
print('----------------')
print('----------------')
fb_dataset = get_dataset('FacebookPagePage')

Dataset: Cora
Number of graphs: 1
Number of Nodes: 2708
Number of features:  1433
Number of classes:  7
----------------
Graph:
Are edges directed: False
Graph Has Isolated Nodes: False
Graph has loops: False
----------------
----------------
Dataset: FacebookPagePage()
Number of graphs: 1
Number of Nodes: 22470
Number of features:  128
Number of classes:  4
----------------
Graph:
Are edges directed: False
Graph Has Isolated Nodes: False
Graph has loops: True


## Vanilla Neural Network Classification Model

In [32]:
class MLP(Module):
    def __init__(self, dim_in,dim_h, dim_out):
        super(MLP, self).__init__() # Initialize the parent class
        self.linear1 = Linear(dim_in, dim_h) # First layer
        self.linear2 = Linear(dim_h, dim_out) # Second layer

    def accuracy(self,y_pred, y_true):
        return Sum(y_pred == y_true)/ len(y_true) # Calculate accuracy


    def forward(self, x):
        x = self.linear1(x)
        x = Relu(x)
        x = self.linear2(x)
        return F.log_softmax(x, dim=1) # Apply log softmax to the output
    
    def fit(self, data, epochs):
        criterion = CrossEntropyLoss() # Loss function
        optimizer = Adam(self.parameters(), lr=0.01, weight_decay=5e-4)
        self.train() # Optimizer
        for epoch in range(epochs+1):
            optimizer.zero_grad() # Zero gradients
            out = self(data.x)
            loss = criterion(out[data.train_mask], data.y[data.train_mask])
            acc = self.accuracy(out[data.train_mask].argmax(dim=1), data.y[data.train_mask])
            loss.backward()
            optimizer.step()
            if epoch % 20 == 0:
                val_loss = criterion(out[data.val_mask], data.y[data.val_mask])
                val_acc = self.accuracy(out[data.val_mask].argmax(dim=1), data.y[data.val_mask])
                print(f'Epoch {epoch}, Loss: {loss.item()}, Accuracy: {acc.item()}')
                print(f'Validation Loss: {val_loss.item()}, Validation Accuracy: {val_acc.item()}')
                print('--------------------------------------------------')
    @no_grad() 
    def test(self, data):
        self.eval()
        out = self(data.x)
        acc = self.accuracy(out[data.test_mask].argmax(dim=1), data.y[data.test_mask])
        return acc

print('Training MLP on Cora dataset')
mlp = MLP(cora_dataset.num_features, 16, cora_dataset.num_classes)
print(mlp)
mlp.fit(cora_dataset[0], 100)
acc = mlp.test(cora_dataset[0])
print(f'\nMLP test accuracy: {acc*100:.2f}%')

print('Training MLP on Facebook Page-Page dataset')
mlp = MLP(fb_dataset.num_features, 16, fb_dataset.num_classes)
print(mlp)
fb_data = fb_dataset[0]
fb_data.train_mask = range(18000)
fb_data.val_mask = range(18001, 20000)
fb_data.test_mask = range(20001, 22470)
mlp.fit(fb_data, 100)
acc = mlp.test(fb_data)
print(f'\nMLP test accuracy: {acc*100:.2f}%')

Training MLP on Cora dataset
MLP(
  (linear1): Linear(in_features=1433, out_features=16, bias=True)
  (linear2): Linear(in_features=16, out_features=7, bias=True)
)
Epoch 0, Loss: 1.9574350118637085, Accuracy: 0.12857143580913544
Validation Loss: 2.012451171875, Validation Accuracy: 0.06599999964237213
--------------------------------------------------
Epoch 20, Loss: 0.09298466891050339, Accuracy: 1.0
Validation Loss: 1.4846652746200562, Validation Accuracy: 0.5080000162124634
--------------------------------------------------
Epoch 40, Loss: 0.010838205926120281, Accuracy: 1.0
Validation Loss: 1.5820441246032715, Validation Accuracy: 0.515999972820282
--------------------------------------------------
Epoch 60, Loss: 0.00654150266200304, Accuracy: 1.0
Validation Loss: 1.640376091003418, Validation Accuracy: 0.49000000953674316
--------------------------------------------------
Epoch 80, Loss: 0.007609022315591574, Accuracy: 1.0
Validation Loss: 1.5315110683441162, Validation Accuracy

## Vanilla Graph Neural Network

In [47]:
class VanillaGNNLayer(Module):
    def __init__(self, dim_in, dim_out):
        super().__init__()
        self.linear = Linear(dim_in, dim_out, bias=False)
    def forward(self, x, adjacency):
        x = self.linear(x)
        x = mm(adjacency, x)
        return x
    
class VanillaGNN(Module):
    def __init__(self, dim_in, dim_h, dim_out):
        super().__init__()
        self.gnn1 = VanillaGNNLayer(dim_in, dim_h)
        self.gnn2 = VanillaGNNLayer(dim_h, dim_out)
    def accuracy(self,y_pred, y_true):
        return Sum(y_pred == y_true)/ len(y_true) # Calculate accuracy
    
    def forward(self, x, adjacency):
        h = self.gnn1(x, adjacency)
        h = Relu(h)
        h = self.gnn2(h, adjacency)
        return F.log_softmax(h, dim=1) # Apply log softmax to the output
    
    def fit(self, data, epochs):
        criterion = CrossEntropyLoss()
        optimizer = Adam(self.parameters(), lr=0.01, weight_decay=5e-4)
        self.train()
        for epoch in range(epochs+1):
            optimizer.zero_grad()
            out = self(data.x, adjacency)
            loss = criterion(out[data.train_mask], data.y[data.train_mask])
            acc = self.accuracy(out[data.train_mask].argmax(dim=1), data.y[data.train_mask])
            loss.backward()
            optimizer.step()
            if epoch % 20 == 0:
                val_loss = criterion(out[data.val_mask], data.y[data.val_mask])
                val_acc = self.accuracy(out[data.val_mask].argmax(dim=1), data.y[data.val_mask])
                print(f'Epoch {epoch}, Loss: {loss.item()}, Accuracy: {acc.item()}')
                print(f'Validation Loss: {val_loss.item()}, Validation Accuracy: {val_acc.item()}')
                print('--------------------------------------------------')
    def test(self, data):
        self.eval()
        out = self(data.x, adjacency)
        acc = self.accuracy(out[data.test_mask].argmax(dim=1), data.y[data.test_mask])
        return acc



gnn = VanillaGNN(cora_dataset.num_features, 16, cora_dataset.num_classes)
print(gnn)
adjacency = to_dense_adj(cora_dataset[0].edge_index)[0]
adjacency += eye(len(adjacency))

gnn.fit(cora_dataset[0], 100)
acc = gnn.test(cora_dataset[0])
print(f'\nVanilla GNN test accuracy: {acc*100:.2f}%')


print('Training GNN on Facebook Page-Page dataset')
gnn = VanillaGNN(fb_dataset.num_features, 16, fb_dataset.num_classes)
print(gnn)
fb_data = fb_dataset[0]
fb_data.train_mask = range(18000)
fb_data.val_mask = range(18001, 20000)
fb_data.test_mask = range(20001, 22470)
adjacency = to_dense_adj(fb_data.edge_index)[0]
adjacency += eye(len(adjacency))

gnn.fit(fb_data, 100)
acc = gnn.test(fb_data)
print(f'\nGNN test accuracy: {acc*100:.2f}%')


VanillaGNN(
  (gnn1): VanillaGNNLayer(
    (linear): Linear(in_features=1433, out_features=16, bias=False)
  )
  (gnn2): VanillaGNNLayer(
    (linear): Linear(in_features=16, out_features=7, bias=False)
  )
)
Epoch 0, Loss: 2.0393877029418945, Accuracy: 0.18571428954601288
Validation Loss: 2.041440725326538, Validation Accuracy: 0.1599999964237213
--------------------------------------------------
Epoch 20, Loss: 0.11683237552642822, Accuracy: 0.9785714149475098
Validation Loss: 1.6797665357589722, Validation Accuracy: 0.734000027179718
--------------------------------------------------
Epoch 40, Loss: 0.015732694417238235, Accuracy: 1.0
Validation Loss: 2.2461977005004883, Validation Accuracy: 0.7260000109672546
--------------------------------------------------
Epoch 60, Loss: 0.0033421311527490616, Accuracy: 1.0
Validation Loss: 2.404268741607666, Validation Accuracy: 0.7440000176429749
--------------------------------------------------
Epoch 80, Loss: 0.0020419806241989136, Accurac