PROTEINS is a dataset of proteins that are classified as enzymes or non-enzymes. 
Nodes represent the amino acids and two nodes are connected by an edge if they are less than 6 Angstroms apart.

In [11]:

import torch
from torch_geometric.datasets import TUDataset

dataset = TUDataset(root='data/TUDataset', name='PROTEINS')

In [12]:
data = dataset[0]  # 1st graph
 
print()
print(f'Dataset: {dataset}:')
print('====================')
# How many graphs?
print(f'Number of graphs: {len(dataset)}')
# How many features?
print(f'Number of features: {dataset.num_features}')
# Now, in our first graph, how many edges?
print(f'Number of edges: {data.num_edges}')
# Average node degree?
print(f'Average node degree: {data.num_edges / data.num_nodes:.2f}')
# Do we have isolated nodes?
print(f'Contains isolated nodes: {data.contains_isolated_nodes()}')
# Do we contain self-loops?
print(f'Contains self-loops: {data.contains_self_loops()}')
# Is this an undirected graph?
print(f'Is undirected: {data.is_undirected()}')
print(data)


Dataset: PROTEINS(1113):
Number of graphs: 1113
Number of features: 3
Number of edges: 162
Average node degree: 3.86
Contains isolated nodes: False
Contains self-loops: False
Is undirected: True
Data(edge_index=[2, 162], x=[42, 3], y=[1])


In [13]:
torch.manual_seed(12345)
dataset = dataset.shuffle()
train_dataset = dataset[110:]
test_dataset = dataset[0:110]


In [14]:
from torch_geometric.data import DataLoader
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [15]:
from torch.nn import Linear
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.nn import global_mean_pool
 

class GCN(torch.nn.Module):
    def __init__(self, hidden_channels):
        super(GCN, self).__init__()        
        self.conv1 = GCNConv(dataset.num_node_features, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.conv3 = GCNConv(hidden_channels, hidden_channels)
        self.lin = Linear(hidden_channels, dataset.num_classes)
        
    def forward(self, x, edge_index, batch):
      x = self.conv1(x, edge_index)
      x = x.relu()
      x = self.conv2(x, edge_index)
      x = x.relu()
      x = self.conv3(x, edge_index)   
      x = global_mean_pool(x, batch)  
      x = F.dropout(x, p=0.5, training=self.training)
      x = self.lin(x)
      return x
    
model = GCN(hidden_channels=64)
print(model)

GCN(
  (conv1): GCNConv(3, 64)
  (conv2): GCNConv(64, 64)
  (conv3): GCNConv(64, 64)
  (lin): Linear(in_features=64, out_features=2, bias=True)
)


In [16]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

criterion = torch.nn.CrossEntropyLoss()
 
def train():
    model.train()
 
    for data in train_loader:  
      out = model(data.x, data.edge_index, data.batch)  
      loss = criterion(out, data.y)  
      loss.backward()  
      optimizer.step()  
      optimizer.zero_grad()  

def test(loader):
  model.eval()
 
  correct = 0
  for data in loader:  
      out = model(data.x, data.edge_index, data.batch)  
      pred = out.argmax(dim=1)  
      correct += int((pred == data.y).sum())  
  return correct / len(loader.dataset)  
 

for epoch in range(1, 201):
    train()
    train_acc = test(train_loader)
    test_acc = test(test_loader)
    print(f'Epoch: {epoch:03d}, Train Acc: {train_acc:.4f}, Test Acc: {test_acc:.4f}')

Epoch: 001, Train Acc: 0.6351, Test Acc: 0.6091
Epoch: 002, Train Acc: 0.6790, Test Acc: 0.6636
Epoch: 003, Train Acc: 0.6481, Test Acc: 0.6818
Epoch: 004, Train Acc: 0.6590, Test Acc: 0.7000
Epoch: 005, Train Acc: 0.7129, Test Acc: 0.7273
Epoch: 006, Train Acc: 0.6879, Test Acc: 0.7273
Epoch: 007, Train Acc: 0.6740, Test Acc: 0.6636
Epoch: 008, Train Acc: 0.7089, Test Acc: 0.7273
Epoch: 009, Train Acc: 0.6889, Test Acc: 0.7000
Epoch: 010, Train Acc: 0.6710, Test Acc: 0.6909
Epoch: 011, Train Acc: 0.7168, Test Acc: 0.7273
Epoch: 012, Train Acc: 0.6780, Test Acc: 0.6818
Epoch: 013, Train Acc: 0.7149, Test Acc: 0.7091
Epoch: 014, Train Acc: 0.6909, Test Acc: 0.6909
Epoch: 015, Train Acc: 0.7119, Test Acc: 0.7000
Epoch: 016, Train Acc: 0.7129, Test Acc: 0.7000
Epoch: 017, Train Acc: 0.7238, Test Acc: 0.7364
Epoch: 018, Train Acc: 0.7238, Test Acc: 0.7273
Epoch: 019, Train Acc: 0.6849, Test Acc: 0.6727
Epoch: 020, Train Acc: 0.7228, Test Acc: 0.7273
Epoch: 021, Train Acc: 0.6939, Test Acc:

Epoch: 172, Train Acc: 0.7248, Test Acc: 0.7455
Epoch: 173, Train Acc: 0.7029, Test Acc: 0.6727
Epoch: 174, Train Acc: 0.7168, Test Acc: 0.7182
Epoch: 175, Train Acc: 0.7258, Test Acc: 0.7182
Epoch: 176, Train Acc: 0.7228, Test Acc: 0.7091
Epoch: 177, Train Acc: 0.7049, Test Acc: 0.7364
Epoch: 178, Train Acc: 0.7188, Test Acc: 0.7273
Epoch: 179, Train Acc: 0.7218, Test Acc: 0.7364
Epoch: 180, Train Acc: 0.7149, Test Acc: 0.6727
Epoch: 181, Train Acc: 0.7109, Test Acc: 0.6909
Epoch: 182, Train Acc: 0.7149, Test Acc: 0.7455
Epoch: 183, Train Acc: 0.7168, Test Acc: 0.7455
Epoch: 184, Train Acc: 0.7139, Test Acc: 0.7182
Epoch: 185, Train Acc: 0.7318, Test Acc: 0.7364
Epoch: 186, Train Acc: 0.7258, Test Acc: 0.7455
Epoch: 187, Train Acc: 0.7159, Test Acc: 0.7091
Epoch: 188, Train Acc: 0.7278, Test Acc: 0.7455
Epoch: 189, Train Acc: 0.7218, Test Acc: 0.7182
Epoch: 190, Train Acc: 0.7079, Test Acc: 0.7182
Epoch: 191, Train Acc: 0.7228, Test Acc: 0.7545
Epoch: 192, Train Acc: 0.7019, Test Acc: