In [3]:
import torch
import torch.nn as nn
from torch.optim import Adam, SGD
from torch.utils.data import DataLoader
from torch.utils.data import sampler

import torchvision.datasets as dset
import torchvision.transforms as T
import torch.nn.functional as F

import os.path as osp
import numpy as np
import pandas as pd
import os
import glob
import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn

from torch_geometric.datasets import TUDataset
from torch_geometric.data import DataLoader
from torch_geometric.nn import GraphConv, TopKPooling
from torch_geometric.nn import global_mean_pool as gap, global_max_pool as gmp

eps = np.finfo(float).eps

plt.rcParams['figure.figsize'] = 10, 10
%matplotlib inline

%load_ext autoreload
%autoreload 2

In [2]:
# from pytorch geometric examples, enzymes_topk_pool.py

In [3]:
# dataset
path = '../data/ENZYMES/'
dataset = TUDataset(path, name='ENZYMES')
dataset = dataset.shuffle()
n = len(dataset) // 10

In [4]:
test_dataset = dataset[:n]
train_dataset = dataset[n:]
test_loader = DataLoader(test_dataset, batch_size=60)
train_loader = DataLoader(train_dataset, batch_size=60)

In [4]:
data = train_loader.__iter__().__next__()
x, edge_index, batch, y = data.x, data.edge_index, data.batch, data.y

* Why max and mean pooling together
* Why the skip layers
* to look up

In [13]:
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()

        self.conv1 = GraphConv(dataset.num_features, 128)
        self.pool1 = TopKPooling(128, ratio=0.8)
        self.conv2 = GraphConv(128, 128)
        self.pool2 = TopKPooling(128, ratio=0.8)
        self.conv3 = GraphConv(128, 128)
        self.pool3 = TopKPooling(128, ratio=0.8)

        self.lin1 = torch.nn.Linear(256, 128)
        self.lin2 = torch.nn.Linear(128, 64)
        self.lin3 = torch.nn.Linear(64, dataset.num_classes)
        
    def forward(self, data):
        x, edge_index, batch_ind = data.x, data.edge_index, data.batch
        
        x = torch.relu(self.conv1(x, edge_index))
        x, edge_index, _, batch_ind, _, _ = self.pool1(x, edge_index, None, batch_ind)
        
        x1 = torch.cat([gmp(x, batch_ind), gap(x, batch_ind)], dim=1)
        
        x = F.relu(self.conv2(x, edge_index))
        x, edge_index, _, batch_ind, _, _ = self.pool2(x, edge_index, None, batch_ind)
        x2 = torch.cat([gmp(x, batch_ind), gap(x, batch_ind)], dim=1)

        x = F.relu(self.conv3(x, edge_index))
        x, edge_index, _, batch_ind, _, _ = self.pool3(x, edge_index, None, batch_ind)
        x3 = torch.cat([gmp(x, batch_ind), gap(x, batch_ind)], dim=1)
        
        x = x1 + x2 + x3
        
        
        x = F.relu(self.lin1(x))
        x = F.dropout(x, p=0.5, training=self.training)
        x = F.relu(self.lin2(x))
        x = F.log_softmax(self.lin3(x), dim=-1)

        return x


In [14]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Net().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.0005)


In [15]:
def train(epoch):
    model.train()

    loss_all = 0
    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, data.y)
        loss.backward()
        loss_all += data.num_graphs * loss.item()
        optimizer.step()
    return loss_all / len(train_dataset)


def test(loader):
    model.eval()

    correct = 0
    for data in loader:
        data = data.to(device)
        pred = model(data).max(dim=1)[1]
        correct += pred.eq(data.y).sum().item()
    return correct / len(loader.dataset)

In [16]:
for epoch in range(1, 201):
    loss = train(epoch)
    train_acc = test(train_loader)
    test_acc = test(test_loader)
    print('Epoch: {:03d}, Loss: {:.5f}, Train Acc: {:.5f}, Test Acc: {:.5f}'.
          format(epoch, loss, train_acc, test_acc))

Epoch: 001, Loss: 1.80057, Train Acc: 0.16852, Test Acc: 0.15000
Epoch: 002, Loss: 1.79263, Train Acc: 0.17037, Test Acc: 0.11667
Epoch: 003, Loss: 1.78201, Train Acc: 0.19074, Test Acc: 0.13333
Epoch: 004, Loss: 1.76660, Train Acc: 0.25370, Test Acc: 0.18333
Epoch: 005, Loss: 1.75460, Train Acc: 0.23148, Test Acc: 0.18333
Epoch: 006, Loss: 1.74739, Train Acc: 0.21111, Test Acc: 0.10000
Epoch: 007, Loss: 1.74780, Train Acc: 0.25556, Test Acc: 0.25000
Epoch: 008, Loss: 1.71536, Train Acc: 0.27778, Test Acc: 0.20000
Epoch: 009, Loss: 1.69471, Train Acc: 0.27778, Test Acc: 0.16667
Epoch: 010, Loss: 1.69440, Train Acc: 0.31481, Test Acc: 0.18333
Epoch: 011, Loss: 1.68888, Train Acc: 0.31296, Test Acc: 0.20000
Epoch: 012, Loss: 1.69348, Train Acc: 0.33889, Test Acc: 0.20000
Epoch: 013, Loss: 1.67008, Train Acc: 0.32963, Test Acc: 0.18333
Epoch: 014, Loss: 1.65592, Train Acc: 0.35370, Test Acc: 0.20000
Epoch: 015, Loss: 1.67291, Train Acc: 0.34630, Test Acc: 0.20000
Epoch: 016, Loss: 1.65758

Epoch: 128, Loss: 0.79801, Train Acc: 0.82778, Test Acc: 0.35000
Epoch: 129, Loss: 0.74684, Train Acc: 0.80741, Test Acc: 0.38333
Epoch: 130, Loss: 0.77411, Train Acc: 0.80370, Test Acc: 0.41667
Epoch: 131, Loss: 0.75056, Train Acc: 0.78889, Test Acc: 0.40000
Epoch: 132, Loss: 0.74585, Train Acc: 0.81481, Test Acc: 0.40000
Epoch: 133, Loss: 0.71124, Train Acc: 0.81296, Test Acc: 0.35000
Epoch: 134, Loss: 0.70010, Train Acc: 0.83889, Test Acc: 0.36667
Epoch: 135, Loss: 0.73364, Train Acc: 0.80370, Test Acc: 0.40000
Epoch: 136, Loss: 0.71461, Train Acc: 0.82222, Test Acc: 0.40000
Epoch: 137, Loss: 0.72528, Train Acc: 0.84074, Test Acc: 0.38333
Epoch: 138, Loss: 0.71188, Train Acc: 0.81111, Test Acc: 0.36667
Epoch: 139, Loss: 0.80195, Train Acc: 0.75370, Test Acc: 0.38333
Epoch: 140, Loss: 0.77436, Train Acc: 0.77593, Test Acc: 0.36667
Epoch: 141, Loss: 0.74853, Train Acc: 0.81852, Test Acc: 0.33333
Epoch: 142, Loss: 0.68656, Train Acc: 0.81111, Test Acc: 0.36667
Epoch: 143, Loss: 0.65417

#### Random walking / node2vec on enzymes

In [5]:
# build randomwalks per graph per class
from torch_cluster import random_walk
p, q = 1, 1
walk_length = 16
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


path = '../data/ENZYMES/'
dataset = TUDataset(path, name='ENZYMES')n
dataset = dataset.shuffle()
n = len(dataset) // 10

test_dataset = dataset[:n]
train_dataset = dataset[n:]
test_loader = DataLoader(test_dataset, batch_size=1)
train_loader = DataLoader(train_dataset, batch_size=1)

In [None]:
walk_data = []
walk_y = []
for graph in dataset:
    x, edge_index, y = graph.x, graph.edge_index, graph.y
    subset = torch.arange(x.size(0), device=edge_index.device)
    walks = random_walk(edge_index[0], edge_index[1], subset, walk_length, p, q, x.size(0))
    walk_data.append(walks)
    walk_y.append(torch.ones(walks.size(0), 1) * y)
    

walk_data = torch.cat(walk_data, 0)
walk_y = torch.cat(walk_y, 0)

In [17]:
walk_y.size()

torch.Size([20, 1])

In [48]:
len(dataset)

600

In [52]:
for graph in dataset:
    print(graph)
    break

Data(edge_index=[2, 120], x=[32, 3], y=[1])


In [25]:
data = train_loader.__iter__().__next__()


In [26]:

for data in train_loader:

    x, edge_index, batch, y = data.x, data.edge_index, data.batch, data.y

    print(x.size())
    print(data.batch)

torch.Size([26, 3])
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0])
torch.Size([18, 3])
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
torch.Size([23, 3])
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
torch.Size([46, 3])
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
torch.Size([46, 3])
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
torch.Size([18, 3])
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
torch.Size([22, 3])
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
torch.Size([16, 3])
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
torch.Size([15, 3])
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
torch.Size([27, 3])
tenso

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0])
torch.Size([42, 3])
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
torch.Size([32, 3])
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0])
torch.Size([27, 3])
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0])
torch.Size([11, 3])
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
torch.Size([40, 3])
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
torch.Size([20, 3])
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
torch.Size([3, 3])
tensor([0, 0, 0])
torch.Size([24, 3])
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
torch.Size([29, 3])
tensor([0, 0,

        0, 0, 0, 0])
torch.Size([14, 3])
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
torch.Size([28, 3])
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0])
torch.Size([28, 3])
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0])
torch.Size([25, 3])
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0])
torch.Size([26, 3])
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0])
torch.Size([42, 3])
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
torch.Size([24, 3])
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
torch.Size([42, 3])
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
torch.Size([