<a href="https://colab.research.google.com/github/soumyadiptapete/GNN_practice/blob/main/vanilla_GNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install torch-geometric
from torch_geometric.datasets import FacebookPagePage
dataset=FacebookPagePage(root='.')
data=dataset[0]
print(f'is directed: {data.is_directed()}')
print(f'number of nodes {data.x.shape[0]}')
print(f'data has isolated nodes {data.has_isolated_nodes()}')
print(f'data has loop: {data.has_self_loops()}')

is directed: False
number of nodes 22470
data has isolated nodes False
data has loop: True


In [2]:
# make dataset as table with dims Nxd
import pandas as pd
df_x=pd.DataFrame(data.x.numpy())
df_x['label']=pd.DataFrame(data.y)
df_x.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,119,120,121,122,123,124,125,126,127,label
0,-0.262576,-0.276483,-0.26235,-0.299327,-0.299159,-0.270681,-0.307757,-0.269733,-0.25101,-0.308343,...,-0.273229,-0.2237,-0.284379,-0.224216,-0.209509,-0.255755,-0.21514,-0.375903,-0.223836,0
1,-0.262576,-0.276483,-0.26235,-0.299327,-0.299159,-0.270681,-0.307757,-0.269733,-0.25101,-0.308343,...,-0.234818,-0.2237,-0.284379,-0.197935,-0.147256,-0.255755,-0.21514,-0.364134,-0.128634,2
2,-0.262576,-0.265053,-0.26235,-0.299327,-0.299159,-0.270681,-0.307757,-0.210461,-0.25101,3.222161,...,-0.273229,-0.2237,-0.284379,-0.224216,-0.209509,-0.255755,-0.21514,-0.375903,-0.223836,1
3,-0.246378,-0.276483,-0.241991,-0.299327,-0.299159,-0.270681,-0.307051,-0.269733,-0.25101,-0.308343,...,-0.273229,-0.2237,-0.265534,-0.080353,-0.209509,-0.25056,-0.18026,-0.375903,-0.223836,2
4,-0.262576,-0.276483,-0.26235,-0.299327,-0.299159,-0.270681,-0.307757,-0.269733,-0.25101,-0.308343,...,-0.273229,-0.175312,-0.272613,-0.224216,-0.181153,-0.255755,-0.21514,-0.370639,-0.223836,3


In [3]:
data.train_mask=range(18000)
data.val_mask=range(18001,20000)
data.test_mask=range(20001,22470)


In [4]:
data.y.shape

torch.Size([22470])

In [5]:
def accuracy(y_pred, y_true):
    """Calculate accuracy."""
    return torch.sum(y_pred == y_true) / len(y_true)

In [13]:
from torch_geometric.utils import to_dense_adj
A_tilda= to_dense_adj(data.edge_index)[0]
A_tilda+=torch.eye(A_tilda.shape[0])
print(A_tilda.shape)
A_tilda

torch.Size([22470, 22470])


tensor([[1., 0., 0.,  ..., 0., 0., 0.],
        [0., 1., 0.,  ..., 0., 0., 0.],
        [0., 0., 1.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 1., 0., 0.],
        [0., 0., 0.,  ..., 0., 1., 0.],
        [0., 0., 0.,  ..., 0., 0., 1.]])

In [14]:
import numpy as np
import torch
from torch.nn import Linear
import torch.nn.functional as F
#create GNN layer
class GNN_layer_simple(torch.nn.Module):
  # out=(A+I)XW
  def __init__(self,dim_in, dim_out):
    super().__init__()
    self.Linear1=Linear(dim_in,dim_out,bias=False)

  def forward(self,x,A_tilda):
    x=self.Linear1(x) #N*h
    x=torch.sparse.mm(A_tilda, x)
    return x



In [15]:
#create vanilla GNN
class GNN(torch.nn.Module):
  def __init__(self, dim_in,dim_h,dim_out):
    super().__init__()
    self.GNN1=GNN_layer_simple(dim_in,dim_h)
    self.GNN2=GNN_layer_simple(dim_h,dim_out)

  def forward(self,x):
    x=self.GNN1(x,A_tilda)#N*dim_h
    x=F.relu(x)
    x=self.GNN2(x,A_tilda)#N*dim_out
    return F.log_softmax(x,dim=1)


  def fit(self,data,epochs):
    criterion=torch.nn.CrossEntropyLoss() #input is N*c, c is num classes, target is N
    optimizer=torch.optim.Adam(self.parameters(), lr=0.01, weight_decay=5e-4)

    y=data.y#labels


    for epoch in range(epochs+1):
      out=self(data.x)#output of network forward pass
      optimizer.zero_grad()
      loss=criterion(out[data.train_mask],y[data.train_mask])
      # print train loss and validation loss
      if epoch%20==0:
        val_loss=criterion(out[data.val_mask],y[data.val_mask])

        train_acc= accuracy(torch.argmax(out[data.train_mask],dim=1), y[data.train_mask])
        val_acc= accuracy(torch.argmax(out[data.val_mask],dim=1), y[data.val_mask])
        print(f'Train Loss : {loss}')
        print(f'Validation loss: {val_loss}')

        print(f'Train acc: {train_acc}')
        print(f'Val acc:{val_acc}')

      loss.backward()
      optimizer.step()

  @torch.no_grad()
  def test(self,data):
    self.eval()
    out=self(data.x)
    y_pred= torch.argmax(out[data.test_mask],dim=1)
    y_actual=data.y[data.test_mask]
    accuracy=torch.sum(y_pred==y_actual)/len(y_pred)
    print(f'Accuracy : {accuracy}')


In [17]:
gnn_model=GNN(dataset.num_features, 16, dataset.num_classes)
print(gnn_model)

GNN(
  (GNN1): GNN_layer_simple(
    (Linear1): Linear(in_features=128, out_features=16, bias=False)
  )
  (GNN2): GNN_layer_simple(
    (Linear1): Linear(in_features=16, out_features=4, bias=False)
  )
)


In [18]:
out_sample=gnn_model(data.x)

In [19]:
out_sample.shape

torch.Size([22470, 4])

In [20]:
gnn_model.fit(data,epochs=100)

Train Loss : 34.784034729003906
Validation loss: 29.348726272583008
Train acc: 0.4033333361148834
Val acc:0.4082041084766388
Train Loss : 3.8205816745758057
Validation loss: 2.3993897438049316
Train acc: 0.8216666579246521
Val acc:0.8304151892662048
Train Loss : 1.276506781578064
Validation loss: 0.9820217490196228
Train acc: 0.8270000219345093
Val acc:0.8299149870872498
Train Loss : 0.6856522560119629
Validation loss: 0.6403438448905945
Train acc: 0.8379999995231628
Val acc:0.8379189372062683
Train Loss : 0.5255650281906128
Validation loss: 0.5156439542770386
Train acc: 0.8639444708824158
Val acc:0.8649324774742126
Train Loss : 0.473837286233902
Validation loss: 0.4653433561325073
Train acc: 0.875333309173584
Val acc:0.8774387240409851


In [20]:
gnn_model.test(data)

Accuracy : 0.44390439987182617
