In [1]:
import sklearn.metrics
import torch
import torch.nn.functional as F
from torch_geometric.datasets import Planetoid, PPI
from torch_geometric.nn import GAT
from datetime import datetime

In [2]:
for dataset in ['citeseer', 'cora', 'pubmed']:
  start = datetime.now()
  dataset = Planetoid(root=f'../data/{dataset}', name=dataset)
  # Define model and optimizer
  model = GAT(
      in_channels=dataset.num_features,
      out_channels=dataset.num_classes,
      hidden_channels=8,
      num_layers=2,
      heads=8,
      dropout=0.6,
      act='elu',
      act_first=True
  )
  # {'PairNorm', 'GraphSizeNorm', 'HeteroLayerNorm', 'InstanceNorm', 'BatchNorm', 'DiffGroupNorm', 'GraphNorm', 'HeteroBatchNorm', 'MessageNorm', 'MeanSubtractionNorm', 'LayerNorm'}
  optimizer = torch.optim.Adam(model.parameters(), lr=0.005, weight_decay=5e-4)

  # Train model
  for epoch in range(200):
      model.train()
      optimizer.zero_grad()
      out = model(dataset.x, dataset.edge_index)
      #loss = F.nll_loss(out[dataset.train_mask], dataset.y[dataset.train_mask])
      loss = F.cross_entropy(out[dataset.train_mask], dataset.y[dataset.train_mask])
      loss.backward()
      optimizer.step()

      # Evaluate model
      model.eval()
      pred = model(dataset.x, dataset.edge_index).argmax(dim=1)
      correct = int(pred[dataset.train_mask].eq(dataset.y[dataset.train_mask]).sum().item())
      acc = correct / int(dataset.train_mask.sum())
      # print(f'Epoch {epoch + 1:03d}, Loss: {loss:.4f}, Test Acc: {acc:.4f}')


  # Test the model
  model.eval()
  out = model(dataset.x, dataset.edge_index)
  pred = out.argmax(dim=1)
  acc = pred[dataset.test_mask].eq(dataset.y[dataset.test_mask]).sum().item() / int(dataset.test_mask.sum())
  print('\n\n*****************************************************************************************************\n')
  print(f'                                         {dataset} ')
  print(f'                                         Total Epochs: 200')
  print(f'                                         Test Accuracy: {acc:.4f}')
  print(f'                                         Time Taken: {datetime.now() - start}')
  print('\n*****************************************************************************************************\n\n')



*****************************************************************************************************

                                         citeseer() 
                                         Total Epochs: 200
                                         Test Accuracy: 0.6730
                                         Time Taken: 0:00:17.812977

*****************************************************************************************************




*****************************************************************************************************

                                         cora() 
                                         Total Epochs: 200
                                         Test Accuracy: 0.8050
                                         Time Taken: 0:00:11.101380

*****************************************************************************************************




************************************************************************************************

In [3]:

ppi_train = PPI('../data/ppi/')


Downloading https://data.dgl.ai/dataset/ppi.zip
Extracting ../data/ppi/ppi.zip
Processing...
Done!


In [4]:
model = GAT(
    in_channels=ppi_train.num_features,
    out_channels=ppi_train.num_classes,
    hidden_channels=256,
    num_layers=3,
    heads=4,
    dropout=0.6,
    act='elu',
    act_first=True
)


In [5]:

optimizer = torch.optim.Adam(model.parameters(), lr=0.005, weight_decay=5e-4)


In [None]:
start = datetime.now()
# Train model
for epoch in range(2):
  print(epoch)
  model.train()
  optimizer.zero_grad()
  out = model(ppi_train.x, ppi_train.edge_index)
  loss = F.cross_entropy(out, ppi_train.y)
  loss.backward()
  optimizer.step()

  # Evaluate model
  model.eval()
  pred = model(ppi_train.x, ppi_train.edge_index) > .5
  f1 = sklearn.metrics.f1_score(ppi_train.y.detach().numpy(), pred.detach().numpy(), average='micro')
  #print(f'Epoch {epoch + 1:03d}, Loss: {loss:.4f}, F1: {f1}')


# Test the model
ppi_test = PPI('../data/ppi/', 'test')
model.eval()
out = model(ppi_test.x, ppi_test.edge_index) > .5
f1 = sklearn.metrics.f1_score(ppi_test.y.detach().numpy(), out.detach().numpy(), average='micro')
print('\n\n*****************************************************************************************************\n')
print(f'                                         PPI Dataset ')
print(f'                                         Total Epochs: 200')
print(f'                                         F1 Score: {f1:.4f}')
print(f'                                         Time Taken: {datetime.now() - start}')
print('\n*****************************************************************************************************\n\n')

Alright, now to implement our own version of GAT to more closely follow the paper. Specifically, this is an attempt at reproducing the model they used for the Cora and Citeseer datasets. The Pubmed model is slightly different, and the PPI model is significantly different.

It is unclear from the description in the paper whether GATConv applies dropout in the same way as the paper describes. If we have time, we'll have to revisit the original GAT implementation and compare their dropout methodology with that implemented by GATConv. If they are not the same and if there is time, we will have to go one level lower, either implementing our own GATConv (perhaps forking the one in pytorch geometric?), or implementing our own GAT from scratch.

Furthermore, the paper talks about applying L2 regularization, which we are not doing, and which GATConv does not appear to provide an option for. One step at a time though...

In [7]:
import torch
import torch_geometric

class GATCora(torch.nn.Module):
  def __init__(self, in_channels, n_classes):
    super().__init__()
    self.conv1 = torch_geometric.nn.GATConv(heads=8, out_channels=8, in_channels=in_channels, dropout=.6)
    self.act1 = torch.nn.ELU()
    self.conv2 = torch_geometric.nn.GATConv(heads=1, out_channels=n_classes, in_channels=64, dopout=.6)
    self.act2 = torch.nn.Softmax(dim=1)

  def forward(self, x, edge_index):
    x = self.act1(self.conv1(x, edge_index))
    x = self.act2(self.conv2(x, edge_index))
    return x

In [8]:
for dataset in ['citeseer', 'cora']:
  start = datetime.now()
  dataset = Planetoid(root=f'../data/{dataset}', name=dataset)
  # Define model and optimizer
  model = GATCora(dataset.num_features, dataset.num_classes)
  optimizer = torch.optim.Adam(model.parameters(), lr=0.005, weight_decay=5e-4)

  # Train model
  for epoch in range(200):
      model.train()
      optimizer.zero_grad()
      out = model(dataset.x, dataset.edge_index)
      #loss = F.nll_loss(out[dataset.train_mask], dataset.y[dataset.train_mask])
      loss = F.cross_entropy(out[dataset.train_mask], dataset.y[dataset.train_mask])
      loss.backward()
      optimizer.step()

      # Evaluate model
      model.eval()
      pred = model(dataset.x, dataset.edge_index).argmax(dim=1)
      correct = int(pred[dataset.train_mask].eq(dataset.y[dataset.train_mask]).sum().item())
      acc = correct / int(dataset.train_mask.sum())
#       print(f'Epoch {epoch + 1:03d}, Loss: {loss:.4f}, Test Acc: {acc:.4f}')


  # Test the model
  model.eval()
  out = model(dataset.x, dataset.edge_index)
  pred = out.argmax(dim=1)
  acc = pred[dataset.test_mask].eq(dataset.y[dataset.test_mask]).sum().item() / int(dataset.test_mask.sum())
  print('\n\n*****************************************************************************************************\n')
  print(f'                                         {dataset} ')
  print(f'                                         Total Epochs: 200')
  print(f'                                         Test Accuracy: {acc:.4f}')
  print(f'                                         Time Taken: {datetime.now() - start}')
  print('\n*****************************************************************************************************\n\n')



*****************************************************************************************************

                                         citeseer() 
                                         Total Epochs: 200
                                         Test Accuracy: 0.6720
                                         Time Taken: 0:00:42.444737

*****************************************************************************************************




*****************************************************************************************************

                                         cora() 
                                         Total Epochs: 200
                                         Test Accuracy: 0.7880
                                         Time Taken: 0:00:20.017430

*****************************************************************************************************




In [None]:
class GATPubmed(torch.nn.Module):
  def __init__(self, in_channels, n_classes):
    super().__init__()
    self.conv1 = torch_geometric.nn.GATConv(heads=8, out_channels=8, in_channels=in_channels, dropout=.6)
    self.act1 = torch.nn.ELU()
    self.conv2 = torch_geometric.nn.GATConv(heads=8, out_channels=n_classes, in_channels=64, dopout=.6, concat=False)
    self.act2 = torch.nn.Softmax(dim=1)

  def forward(self, x, edge_index):
    x = self.act1(self.conv1(x, edge_index))
    x = self.act2(self.conv2(x, edge_index))
    return x

In [None]:
start = datetime.now()
dataset = Planetoid(root=f'../data/pubmed', name='pubmed')
# Define model and optimizer
model = GATPubmed(dataset.num_features, dataset.num_classes)
optimizer = torch.optim.Adam(model.parameters(), lr=0.005, weight_decay=5e-4)

# Train model
for epoch in range(200):
  model.train()
  optimizer.zero_grad()
  out = model(dataset.x, dataset.edge_index)
  #loss = F.nll_loss(out[dataset.train_mask], dataset.y[dataset.train_mask])
  loss = F.cross_entropy(out[dataset.train_mask], dataset.y[dataset.train_mask])
  loss.backward()
  optimizer.step()

  # Evaluate model
  model.eval()
  pred = model(dataset.x, dataset.edge_index).argmax(dim=1)
  correct = int(pred[dataset.train_mask].eq(dataset.y[dataset.train_mask]).sum().item())
  acc = correct / int(dataset.train_mask.sum())
#       print(f'Epoch {epoch + 1:03d}, Loss: {loss:.4f}, Test Acc: {acc:.4f}')


# Test the model
model.eval()
out = model(dataset.x, dataset.edge_index)
pred = out.argmax(dim=1)
acc = pred[dataset.test_mask].eq(dataset.y[dataset.test_mask]).sum().item() / int(dataset.test_mask.sum())
print('\n\n*****************************************************************************************************\n')
print(f'                                         Pubmed ')
print(f'                                         Total Epochs: 200')
print(f'                                         Test Accuracy: {acc:.4f}')
print(f'                                         Time Taken: {datetime.now() - start}')
print('\n*****************************************************************************************************\n\n')

Alright, now to implement our own version of GAT to more closely follow the paper. Specifically, this is an attempt at reproducing the model they used for the Cora and Citeseer datasets. The Pubmed model is slightly different, and the PPI model is significantly different.

It is unclear from the description in the paper whether GATConv applies dropout in the same way as the paper describes. If we have time, we'll have to revisit the original GAT implementation and compare their dropout methodology with that implemented by GATConv. If they are not the same and if there is time, we will have to go one level lower, either implementing our own GATConv (perhaps forking the one in pytorch geometric?), or implementing our own GAT from scratch.

Furthermore, the paper talks about applying L2 regularization, which we are not doing, and which GATConv does not appear to provide an option for. One step at a time though...

In [3]:
import sklearn.metrics
import torch
import torch.nn.functional as F
from torch_geometric.datasets import Planetoid
from datetime import datetime

class GATCora(torch.nn.Module):
  def __init__(self, in_channels, n_classes):
    super().__init__()
    self.conv1 = torch_geometric.nn.GATConv(heads=8, out_channels=8, in_channels=in_channels, dropout=.6)
    self.act1 = torch.nn.ELU()
    self.conv2 = torch_geometric.nn.GATConv(heads=1, out_channels=n_classes, in_channels=64, dopout=.6)
    self.act2 = torch.nn.Softmax(dim=1)

  def forward(self, x, edge_index):
    x = self.act1(self.conv1(x, edge_index))
    x = self.act2(self.conv2(x, edge_index))
    return x

In [4]:
for dataset in ['citeseer', 'cora']:
  start = datetime.now()
  dataset = Planetoid(root=f'../data/{dataset}', name=dataset)
  # Define model and optimizer
  model = GATCora(dataset.num_features, dataset.num_classes)
  optimizer = torch.optim.Adam(model.parameters(), lr=0.005, weight_decay=5e-4)

  # Train model
  for epoch in range(200):
      model.train()
      optimizer.zero_grad()
      out = model(dataset.x, dataset.edge_index)
      #loss = F.nll_loss(out[dataset.train_mask], dataset.y[dataset.train_mask])
      loss = F.cross_entropy(out[dataset.train_mask], dataset.y[dataset.train_mask])
      loss.backward()
      optimizer.step()

      # Evaluate model
      model.eval()
      pred = model(dataset.x, dataset.edge_index).argmax(dim=1)
      correct = int(pred[dataset.train_mask].eq(dataset.y[dataset.train_mask]).sum().item())
      acc = correct / int(dataset.train_mask.sum())
#       print(f'Epoch {epoch + 1:03d}, Loss: {loss:.4f}, Test Acc: {acc:.4f}')


  # Test the model
  model.eval()
  out = model(dataset.x, dataset.edge_index)
  pred = out.argmax(dim=1)
  acc = pred[dataset.test_mask].eq(dataset.y[dataset.test_mask]).sum().item() / int(dataset.test_mask.sum())
  print('\n\n*****************************************************************************************************\n')
  print(f'                                         {dataset} ')
  print(f'                                         Total Epochs: 200')
  print(f'                                         Test Accuracy: {acc:.4f}')
  print(f'                                         Time Taken: {datetime.now() - start}')
  print('\n*****************************************************************************************************\n\n')

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.test.index
Processing...
Done!




*****************************************************************************************************

                                         citeseer() 
                                         Total Epochs: 200
                                         Test Accuracy: 0.6900
                                         Time Taken: 0:00:40.600561

*****************************************************************************************************




Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.test.index
Processing...
Done!




*****************************************************************************************************

                                         cora() 
                                         Total Epochs: 200
                                         Test Accuracy: 0.7860
                                         Time Taken: 0:00:21.043799

*****************************************************************************************************




In [5]:
class GATPubmed(torch.nn.Module):
  def __init__(self, in_channels, n_classes):
    super().__init__()
    self.conv1 = torch_geometric.nn.GATConv(heads=8, out_channels=8, in_channels=in_channels, dropout=.6)
    self.act1 = torch.nn.ELU()
    self.conv2 = torch_geometric.nn.GATConv(heads=8, out_channels=n_classes, in_channels=64, dopout=.6, concat=False)
    self.act2 = torch.nn.Softmax(dim=1)

  def forward(self, x, edge_index):
    x = self.act1(self.conv1(x, edge_index))
    x = self.act2(self.conv2(x, edge_index))
    return x

In [6]:
start = datetime.now()
dataset = Planetoid(root=f'../data/pubmed', name='pubmed')
# Define model and optimizer
model = GATPubmed(dataset.num_features, dataset.num_classes)
optimizer = torch.optim.Adam(model.parameters(), lr=0.005, weight_decay=5e-4)

# Train model
for epoch in range(200):
  model.train()
  optimizer.zero_grad()
  out = model(dataset.x, dataset.edge_index)
  #loss = F.nll_loss(out[dataset.train_mask], dataset.y[dataset.train_mask])
  loss = F.cross_entropy(out[dataset.train_mask], dataset.y[dataset.train_mask])
  loss.backward()
  optimizer.step()

  # Evaluate model
  model.eval()
  pred = model(dataset.x, dataset.edge_index).argmax(dim=1)
  correct = int(pred[dataset.train_mask].eq(dataset.y[dataset.train_mask]).sum().item())
  acc = correct / int(dataset.train_mask.sum())
#       print(f'Epoch {epoch + 1:03d}, Loss: {loss:.4f}, Test Acc: {acc:.4f}')


# Test the model
model.eval()
out = model(dataset.x, dataset.edge_index)
pred = out.argmax(dim=1)
acc = pred[dataset.test_mask].eq(dataset.y[dataset.test_mask]).sum().item() / int(dataset.test_mask.sum())
print('\n\n*****************************************************************************************************\n')
print(f'                                         Pubmed ')
print(f'                                         Total Epochs: 200')
print(f'                                         Test Accuracy: {acc:.4f}')
print(f'                                         Time Taken: {datetime.now() - start}')
print('\n*****************************************************************************************************\n\n')

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.test.index
Processing...
Done!




*****************************************************************************************************

                                         Pubmed 
                                         Total Epochs: 200
                                         Test Accuracy: 0.7830
                                         Time Taken: 0:01:45.892788

*****************************************************************************************************


