## Imports

In [2]:
import torch
from torch import nn
import torch.nn.functional as F

import numpy as np
import networkx as nx

from torch_geometric.datasets import Planetoid
from torch_geometric.data import Data
from torch_geometric.loader import DataLoader
from torch_geometric.utils import remove_isolated_nodes, to_networkx

from Graph_Nets import GCNetwork, GANetwork

### Dataset

In [51]:
dataset = Planetoid(root=".", name="CiteSeer")
data = dataset[0]

print(f'Dataset: {dataset}')
print('-------------------')
print(f'Number of graphs: {len(dataset)}')
print(f'Number of nodes: {data.x.shape[0]}')
print(f'Number of features: {dataset.num_features}')
print(f'Number of classes: {dataset.num_classes}')

# Print information about the Dataset
print(f'\nGraph:')
print('------')
print(f'Edges are directed: {data.is_directed()}')
print(f'Graph has isolated nodes: {data.has_isolated_nodes()}')
print(f'Graph has loops: {data.has_self_loops()}')

Dataset: CiteSeer()
-------------------
Number of graphs: 1
Number of nodes: 3327
Number of features: 3703
Number of classes: 6

Graph:
------
Edges are directed: False
Graph has isolated nodes: True
Graph has loops: False


In [52]:
isolated = (remove_isolated_nodes(data['edge_index'])[2] == False).sum(dim=0).item()
print(f'Number of isolated nodes = {isolated}')

Number of isolated nodes = 48


In [53]:
print(data)

Data(x=[3327, 3703], edge_index=[2, 9104], y=[3327], train_mask=[3327], val_mask=[3327], test_mask=[3327])


In [54]:
g = to_networkx(data)
order = sorted(list(g.nodes()))
A = nx.adjacency_matrix(g, nodelist=order)
A = A.todense()
I = np.eye(A.shape[0])
A = A + I

In [55]:
X, y, A = data.x, data.y, torch.Tensor(A)

In [56]:
X.shape, y.shape, A.shape

(torch.Size([3327, 3703]), torch.Size([3327]), torch.Size([3327, 3327]))

## Training

In [9]:
in_features = data.x.size(1)
out_features = 100
n_classes = dataset.num_classes
num_heads = 4
bias = True
alpha = 0.2

#### Graph Convolutional Network

In [21]:
GCN = GCNetwork(in_features=in_features, 
                out_features=out_features, 
                n_classes=n_classes, 
                bias=bias)

In [22]:
num_epochs = 10
lr_rate=0.01

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(GCN.parameters(), lr=lr_rate)

In [24]:
for epoch in range(num_epochs):
    GCN.train()
    preds = GCN(X, A)
    loss = loss_fn(preds[data.train_mask], data.y[data.train_mask])
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    acc = float((torch.argmax(F.softmax(preds[data.train_mask], dim=1), axis=1) == data.y[data.train_mask]).sum() / data.y[data.train_mask].shape[0])

    val_loss = loss_fn(preds[data.val_mask], data.y[data.val_mask])
    val_acc = float((torch.argmax(F.softmax(preds[data.val_mask], dim=1), axis=1) == data.y[data.val_mask]).sum() / data.y[data.val_mask].shape[0])

    print(f'Epoch: {epoch} | train loss: {loss.item()} | train accuracy: {acc} | val loss: {val_loss.item()} | val accuracy {val_acc}')

Epoch: 0 | train loss: 1.7959004640579224 | train accuracy: 0.1666666716337204 | val loss: 1.8095452785491943 | val accuracy 0.06800000369548798
Epoch: 1 | train loss: 1.5769823789596558 | train accuracy: 0.875 | val loss: 1.7483713626861572 | val accuracy 0.28999999165534973
Epoch: 2 | train loss: 1.2074047327041626 | train accuracy: 1.0 | val loss: 1.6158791780471802 | val accuracy 0.6019999980926514
Epoch: 3 | train loss: 0.7897706627845764 | train accuracy: 1.0 | val loss: 1.4578791856765747 | val accuracy 0.6340000033378601
Epoch: 4 | train loss: 0.44315972924232483 | train accuracy: 1.0 | val loss: 1.3113090991973877 | val accuracy 0.6399999856948853
Epoch: 5 | train loss: 0.21667322516441345 | train accuracy: 1.0 | val loss: 1.19483482837677 | val accuracy 0.6499999761581421
Epoch: 6 | train loss: 0.09539853036403656 | train accuracy: 1.0 | val loss: 1.1201817989349365 | val accuracy 0.6480000019073486
Epoch: 7 | train loss: 0.04006306454539299 | train accuracy: 1.0 | val loss: 

#### Graph Attantion Network

In [68]:
GAN = GANetwork(in_features=in_features, 
                out_features=out_features, 
                n_classes=n_classes, 
                num_heads=num_heads, 
                alpha=alpha, 
                bias=bias)

In [69]:
num_epochs = 10
lr_rate=0.001

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(GAN.parameters(), lr=lr_rate)

In [57]:
X, A = X.unsqueeze(axis=0), A.unsqueeze(axis=0)

In [70]:
for epoch in range(num_epochs):
    preds = GAN(X, A)
    loss = loss_fn(preds.squeeze(axis=0)[data.train_mask], data.y[data.train_mask])
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    acc = float((torch.argmax(F.softmax(preds.squeeze(axis=0)[data.train_mask], dim=1), axis=1) == data.y[data.train_mask]).sum() / data.y[data.train_mask].shape[0])

    val_loss = loss_fn(preds.squeeze(axis=0)[data.val_mask], data.y[data.val_mask])
    val_acc = float((torch.argmax(F.softmax(preds.squeeze(axis=0)[data.val_mask], dim=1), axis=1) == data.y[data.val_mask]).sum() / data.y[data.val_mask].shape[0])

    print(f'Epoch: {epoch} | train loss: {loss.item()} | train accuracy: {acc} | val loss: {val_loss.item()} | val accuracy {val_acc}')

Epoch: 0 | train loss: 1.7881513833999634 | train accuracy: 0.15833333134651184 | val loss: 1.793402075767517 | val accuracy 0.1679999977350235
Epoch: 1 | train loss: 1.6587295532226562 | train accuracy: 0.9583333134651184 | val loss: 1.75005042552948 | val accuracy 0.44600000977516174
Epoch: 2 | train loss: 1.5306812524795532 | train accuracy: 0.9916666746139526 | val loss: 1.7071179151535034 | val accuracy 0.6060000061988831
Epoch: 3 | train loss: 1.401977777481079 | train accuracy: 0.9916666746139526 | val loss: 1.6637402772903442 | val accuracy 0.6579999923706055
Epoch: 4 | train loss: 1.2729523181915283 | train accuracy: 0.9916666746139526 | val loss: 1.619776725769043 | val accuracy 0.6579999923706055
Epoch: 5 | train loss: 1.1448097229003906 | train accuracy: 0.9916666746139526 | val loss: 1.575353741645813 | val accuracy 0.6600000262260437
Epoch: 6 | train loss: 1.0190941095352173 | train accuracy: 0.9916666746139526 | val loss: 1.5308476686477661 | val accuracy 0.6639999747276

In [75]:
from sklearn.manifold import TSNE

In [10]:
# x = untrained_gat(data.x, data.edge_index)

# # Train TSNE
# tsne = TSNE(n_components=2, learning_rate='auto', init='pca').fit_transform(h.detach())

# # Plot TSNE
# plt.figure(figsize=(10, 10))
# plt.axis('off')
# plt.scatter(tsne[:, 0], tsne[:, 1], s=50, c=data.y)
# plt.show()

In [84]:
X.shape, A.shape

(torch.Size([1, 3327, 3703]), torch.Size([1, 3327, 3327]))

In [85]:
out.shape

torch.Size([1, 3327, 400])

In [164]:
class GANetwork(nn.Module):
    def __init__(self, 
                 in_features, 
                 out_features,
                 n_classes,
                 num_heads=4,
                 alpha=0.2,
                 bias=True):
        super().__init__()
        self.n_classes = n_classes
        self.num_heads = num_heads
        self.alpha = alpha

        self.W1 = nn.Linear(in_features, out_features * num_heads, bias=bias)
        self.a1 = nn.Parameter(torch.Tensor(num_heads, 2 * out_features))

        self.W2 = nn.Linear(out_features, out_features * num_heads)
        self.a2 = nn.Parameter(torch.Tensor(num_heads, 2 * out_features))

        self.W3 = nn.Linear(out_features * num_heads, out_features)
        self.FL = nn.Linear(out_features * num_heads, n_classes)

        nn.init.xavier_uniform_(self.a1.data, gain=1.414)
        nn.init.xavier_uniform_(self.a2.data, gain=1.414)
    
    def attention_block(self, h, A, W, a):
        # save batch size & number of features
        batch_size, num_nodes = h.size(0), h.size(1)

        # project X to Matrix W
        Wh = W(h)

        # separate each head via separate dimension
        Wh = Wh.view(batch_size, num_nodes, self.num_heads, -1)

        # get edge matrix from A
        edges = A.nonzero(as_tuple=False)

        # remove batch dimension
        Wh_flat_flat = Wh.view(batch_size * num_nodes, self.num_heads, -1)

        # select indices of connected nodes
        edge_indices_row = edges[:, 0] * num_nodes + edges[:, 1]
        edge_indices_col = edges[:, 0] * num_nodes + edges[:, 2]

        # select corresponding projected vectors from Wh_flat_flat Matrix (for connected pairs)
        a_input = torch.cat(
            [
                torch.index_select(input=Wh_flat_flat, index=edge_indices_row, dim=0),
                torch.index_select(input=Wh_flat_flat, index=edge_indices_col, dim=0),
            ],
            dim=-1,
        )
        # calculate attention logits & aplly Leaky_relu activation function
        attn_logits = torch.einsum("bhc,hc->bh", a_input, a)
        attn_logits = F.leaky_relu(attn_logits)

        # replace 0's by '-inf' as features with 0 value should not have attention power (softmax will nullify it)
        attn_matrix = attn_logits.new_zeros(A.shape + (self.num_heads,)).fill_(float('-inf'))
        attn_matrix[A.view(1, A.size(1), A.size(2), 1).repeat(1, 1, 1, self.num_heads) == 1] = attn_logits.reshape(-1)

        # apply softmax to calculate attention scores
        attn_probs = F.softmax(attn_matrix, dim=2)

        # use torch.einsum to get updated embeddings with size (batch_size, num_nodes, out_features * num_heads)
        Wh = torch.einsum("bijh,bjhc->bihc", attn_probs, Wh)
        Wh = Wh.reshape(batch_size, num_nodes, -1)
        return Wh

    def forward(self, h, A):
        h = self.attention_block(h, A, self.W1, self.a1) 
        h = self.W3(h)
        h = self.attention_block(h, A, self.W2, self.a2)
        out = self.FL(h)
        return h, out

In [165]:
GAN = GANetwork(in_features=in_features, 
                out_features=out_features, 
                n_classes=n_classes, 
                num_heads=num_heads, 
                alpha=alpha, 
                bias=bias)

In [166]:
emb, logits = GAN(X, A)

In [168]:
emb.shape, logits.shape

(torch.Size([1, 3327, 400]), torch.Size([1, 3327, 6]))