# "Hello World" GNN tutorial 
## (it is just to learn how it works, but actually the training performances are poor)

In [1]:
%%capture
!pip install spektral

In [2]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Dropout

from spektral.datasets import TUDataset
from spektral.transforms import Degree, GCNFilter
from spektral.data import BatchLoader
from spektral.layers import GCNConv, GlobalSumPool

import plotly.graph_objects as go

In [3]:
dataset = TUDataset('PROTEINS')
dataset.filter(lambda g: g.n_nodes < 50) # less than 50 nodes

Successfully loaded PROTEINS.


In [4]:
#print(dataset[0].a) # print the adjacency list of the first graph of the dataset (it is a sparse matrix)

In [5]:
#print(dataset[0].a.sum(-1).max) # obtain a vector with the degree of each node, shape = (number_of_nodes, 1) = (graph.n_nodes, 1)

In [6]:
max_degree = dataset.map(lambda g: g.a.sum(-1).max(), reduce=max) # map reduce function, which produces the max degree of each graph and then takes the max among them
print(max_degree)

10.0


In [7]:
dataset.apply(Degree(int(max_degree))) # include the one hot encoded degree at each node of the dataset's graphs
dataset.apply(GCNFilter())

In [8]:
class GNN(Model):

    def __init__(self, n_hidden, n_labels):
        super().__init__()
        self.graph_conv = GCNConv(n_hidden)
        self.pool = GlobalSumPool()
        self.dropout = Dropout(0.5)
        self.dense = Dense(n_labels, 'softmax')

    def call(self, inputs):
        out = self.graph_conv(inputs)
        out = self.dropout(out)
        out = self.pool(out)
        out = self.dense(out)
        return out

In [9]:
model = GNN(32, dataset.n_labels) # this is actually a binary classification
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4), 
              loss=tf.keras.losses.BinaryCrossentropy(), 
              metrics=[tf.keras.metrics.Accuracy()])

In [10]:
train_loader = BatchLoader(dataset[:int(.5*len(dataset))], batch_size=32)
val_loader = BatchLoader(dataset[int(.5*len(dataset)):int(.75*len(dataset))], batch_size=32)
test_loader = BatchLoader(dataset[int(.75*len(dataset)):], batch_size=32)

In [11]:
history = model.fit(train_loader.load(), 
                    steps_per_epoch=train_loader.steps_per_epoch, 
                    epochs=10,
                    validation_data=val_loader.load(),
                    validation_steps=val_loader.steps_per_epoch,
                    #callbacks=[tf.keras.callbacks.EarlyStopping(patience=10)],
                    shuffle=False
                    )

  np.random.shuffle(a)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [12]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=np.arange(len(history.history['loss'])), 
                         y=history.history['loss'], 
                         name='Training loss',
                         line=dict(color='red', width=2, dash='dash')))
fig.add_trace(go.Scatter(x=np.arange(len(history.history['val_loss'])), 
                         y=history.history['val_loss'], 
                         name='Validation loss',
                         line=dict(color='green', width=2, dash='dash')))
fig.show()

fig = go.Figure()
fig.add_trace(go.Scatter(x=np.arange(len(history.history['accuracy'])), 
                         y=history.history['accuracy'], 
                         name='Training accuracy',
                         line=dict(color='red', width=2, dash='dash')))
fig.add_trace(go.Scatter(x=np.arange(len(history.history['val_accuracy'])), 
                         y=history.history['val_accuracy'], 
                         name='Validation accuracy',
                         line=dict(color='orange', width=2, dash='dash')))
fig.show()

In [13]:
loss = model.evaluate(test_loader.load(), steps=test_loader.steps_per_epoch)
print('Test loss:', round(loss[0], 2))
print('Test accuracy:', round(loss[1], 2))

Test loss: 20.45
Test accuracy: 0.0



you are shuffling a 'TUDataset' object which is not a subclass of 'Sequence'; `shuffle` is not guaranteed to behave correctly. E.g., non-numpy array/tensor objects with view semantics may contain duplicates after shuffling.

