<a href="https://colab.research.google.com/github/subbu-art/DatExtractor/blob/master/GNN_Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from torch_geometric.datasets import Planetoid
from torch_geometric.transforms import NormalizeFeatures

In [None]:
data = Planetoid(root  = '.', name = 'cora', transform = NormalizeFeatures())

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.test.index
Processing...
Done!


In [None]:
data

cora()

In [None]:
data.num_features

1433

In [None]:
data.get_summary()

  std=data.std().item(),


Planetoid (#graphs=1):
+------------+----------+----------+
|            |   #nodes |   #edges |
|------------+----------+----------|
| mean       |     2708 |    10556 |
| std        |      nan |      nan |
| min        |     2708 |    10556 |
| quantile25 |     2708 |    10556 |
| median     |     2708 |    10556 |
| quantile75 |     2708 |    10556 |
| max        |     2708 |    10556 |
+------------+----------+----------+

In [None]:
data.num_classes

7

In [None]:
data.num_node_features

1433

In [None]:
data_ext = data[0]

In [None]:
data

Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708])

In [None]:
# The features per node(per paper) is dervied from bag of word style representation.

In [None]:
data_ext.x[0][:30]

tensor([0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.1111, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000])

In [None]:
data_ext.y

tensor([3, 4, 4,  ..., 3, 3, 3])

In [None]:
data_ext.test_mask
data.num_classes

7

In [None]:
# Graph Neural Network

In [None]:
import torch.nn as nn
import torch
import torch.nn.functional as F
from torch.nn import Linear
from torch_geometric.nn import GCNConv, GATConv

In [None]:
class GNNClassification(nn.Module):
  def __init__(self, channels):
    super(GNNClassification, self).__init__()
    torch.manual_seed(42)
    #intializing layers
    self.GNNLayer1 = GCNConv(data.num_features, channels)
    self.GNNLayer2 = GCNConv(channels, channels)
    self.GNNLayer3 = GCNConv(channels, channels)
    self.GNNLayer4 = GCNConv(channels, channels)
    #output layer
    self.GNNOut = Linear(channels, data.num_classes)

  def forward(self, x, edge_index):
    x = self.GNNLayer1(x, edge_index)
    x = x.relu()
    x = F.dropout(x, p = 0.2, training = self.training)

    x = self.GNNLayer2(x, edge_index)
    x = x.relu()
    x = F.dropout(x, p = 0.2, training = self.training)

    x = self.GNNLayer3(x, edge_index)
    x = x.relu()
    x = F.dropout(x, p = 0.2, training = self.training)

    x = self.GNNLayer4(x, edge_index)
    x = x.relu()
    x = F.dropout(x, p = 0.2, training = self.training)

    y = F.softmax(self.GNNOut(x), dim = 1)
    return y

model = GNNClassification(channels = 16)


In [None]:
# Train the model

In [None]:
learning_rate = 3e-4

optimizer = torch.optim.AdamW(model.parameters(), lr = learning_rate)



def train():
  model.train()
  optimizer.zero_grad()
  out = model(data_ext.x, data_ext.edge_index)

  loss = F.cross_entropy(out[data_ext.train_mask], data.y[data_ext.train_mask])

  loss.backward()

  optimizer.step()

  return loss

losses = []
for epoch in range(12000):
  loss = train()
  losses.append(loss)
  if epoch%200 == 0:
    print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}')

Epoch: 000, Loss: 1.9461
Epoch: 200, Loss: 1.9201
Epoch: 400, Loss: 1.7165
Epoch: 600, Loss: 1.5387
Epoch: 800, Loss: 1.3591
Epoch: 1000, Loss: 1.2964
Epoch: 1200, Loss: 1.3038
Epoch: 1400, Loss: 1.2772
Epoch: 1600, Loss: 1.2339
Epoch: 1800, Loss: 1.2213
Epoch: 2000, Loss: 1.1971
Epoch: 2200, Loss: 1.1988
Epoch: 2400, Loss: 1.1988
Epoch: 2600, Loss: 1.1922
Epoch: 2800, Loss: 1.1949
Epoch: 3000, Loss: 1.2062
Epoch: 3200, Loss: 1.1908
Epoch: 3400, Loss: 1.1921
Epoch: 3600, Loss: 1.1751
Epoch: 3800, Loss: 1.1842
Epoch: 4000, Loss: 1.1914
Epoch: 4200, Loss: 1.1835
Epoch: 4400, Loss: 1.1821
Epoch: 4600, Loss: 1.1835
Epoch: 4800, Loss: 1.1799
Epoch: 5000, Loss: 1.1791
Epoch: 5200, Loss: 1.1821
Epoch: 5400, Loss: 1.1862
Epoch: 5600, Loss: 1.1764


In [None]:
import seaborn as sns
losses_float = [float(loss.cpu().detach().numpy()) for loss in losses]
loss_indices = [i for i,l in enumerate(losses_float)]
plt = sns.lineplot(losses_float)
plt

NameError: name 'losses' is not defined

In [None]:
#Testing the model

In [None]:
def test():
  model.eval()
  out = model(data_ext.x, data_ext.edge_index)

  pred = out.argmax(dim=1)

  test_correct  = pred[data.test_mask] == data.y[data.test_mask]

  test_acc = int(test_correct.sum())/int(data.test_mask.sum())


  return test_acc

In [None]:
test_acc = test()
print(f'Test Accuracy: {test_acc:.4f}')

Test Accuracy: 0.7540
