QM9 Information: https://github.com/danielegrattarola/spektral/blob/master/spektral/datasets/qm9.py  
Graph Regression: https://github.com/danielegrattarola/spektral/blob/master/examples/graph_prediction/qm9_batch.py

In [None]:
!pip install spektral

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

from spektral.datasets import qm9
from spektral.layers import EdgeConditionedConv, GlobalSumPool
from spektral.utils import label_to_one_hot


In [None]:
################################################################################
# PARAMETERS
################################################################################
learning_rate = 1e-3  # Learning rate
epochs = 10           # Number of training epochs
batch_size = 32           # Batch size

################################################################################
# LOAD DATA
################################################################################
A, X, E, y = qm9.load_data(return_type='numpy',
                           nf_keys=['atomic_num', 'charge', 'coords'],
                           ef_keys='type',
                           self_loops=True,
                           amount=1000)  # Set to None to train on whole dataset
y = y[['cv']].values  # Heat capacity at 298.15K

Loading QM9 dataset.
Reading SDF


In [None]:
print(A.shape)
print(X.shape)
print(E.shape)
print(y.shape)

(1000, 8, 8)
(1000, 8, 5)
(1000, 8, 8, 1)
(1000, 1)


In [None]:
X[5]

array([[ 6.0000e+00,  0.0000e+00, -1.4000e-02,  1.1802e+00,  7.8000e-03],
       [ 8.0000e+00,  0.0000e+00,  2.3000e-03, -1.9700e-02,  2.2000e-03],
       [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00]])

In [None]:
# Preprocessing
X_uniq = np.unique(X)
X_uniq = X_uniq[X_uniq != 0]
E_uniq = np.unique(E)
E_uniq = E_uniq[E_uniq != 0]

# X = label_to_one_hot(X, X_uniq)
E = label_to_one_hot(E, E_uniq)

# Parameters
N = X.shape[-2]       # Number of nodes in the graphs
F = X[0].shape[-1]    # Dimension of node features
S = E[0].shape[-1]    # Dimension of edge features
n_out = y.shape[-1]   # Dimension of the target

# Train/test split
A_train, A_test, \
X_train, X_test, \
E_train, E_test, \
y_train, y_test = train_test_split(A, X, E, y, test_size=0.1, random_state=0)

In [None]:
X_in = Input(shape=(N, F))
A_in = Input(shape=(N, N))
E_in = Input(shape=(N, N, S))

X_1 = EdgeConditionedConv(32, activation='relu')([X_in, A_in, E_in])
X_2 = EdgeConditionedConv(32, activation='relu')([X_1, A_in, E_in])
X_3 = GlobalSumPool()(X_2)
output = Dense(n_out)(X_3)

# Build model
model = Model(inputs=[X_in, A_in, E_in], outputs=output)
optimizer = Adam(lr=learning_rate)
model.compile(optimizer=optimizer, loss='mse')
model.summary()

Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 8, 5)]       0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            [(None, 8, 8)]       0                                            
__________________________________________________________________________________________________
input_3 (InputLayer)            [(None, 8, 8, 3)]    0                                            
__________________________________________________________________________________________________
edge_conditioned_conv (EdgeCond (None, 8, 32)        832         input_1[0][0]                    
                                                                 input_2[0][0]         

In [None]:
################################################################################
# FIT MODEL
################################################################################
model.fit([X_train, A_train, E_train],
          y_train,
          batch_size=batch_size,
          epochs=epochs)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f3da6cb6fd0>

In [None]:
################################################################################
# EVALUATE MODEL
################################################################################
print('Testing model')
model_loss = model.evaluate([X_test, A_test, E_test],
                            y_test,
                            batch_size=batch_size)
print('Done. Test loss: {}'.format(model_loss))

Testing model
Done. Test loss: 4.4188737869262695
