In [1]:
import keras.backend as K
import numpy as np
import tensorflow as tf
from keras.layers import Input, Dense
from keras.models import Model
from keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

Using TensorFlow backend.
  return f(*args, **kwds)


In [2]:
from spektral.datasets import qm9
from spektral.layers import GraphConv, GlobalAvgPool
from spektral.layers.ops import sp_matrix_to_sp_tensor_value
from spektral.utils import Batch, batch_iterator
from spektral.utils import label_to_one_hot

In [3]:
qm9.EDGE_FEATURES

['type', 'stereo']

In [4]:
np.random.seed(0)
SW_KEY = 'dense_2_sample_weights:0'  # Keras automatically creates a placeholder for sample weights, which must be fed

In [5]:
# Load data
A, X, _, y = qm9.load_data(return_type='numpy',
                           nf_keys='atomic_num',
                           ef_keys='type',
                           self_loops=True,
                           auto_pad=False,
                           amount=2000)  # Set to None to train on whole dataset
y = y[['cv']].values  # Heat capacity at 298.15K

Loading QM9 dataset.
Reading SDF


100%|██████████| 2000/2000 [00:00<00:00, 5462.48it/s]


In [6]:
# Preprocessing
uniq_X = np.unique([v for x in X for v in np.unique(x)])
X = [label_to_one_hot(x, uniq_X) for x in X]
y = StandardScaler().fit_transform(y).reshape(-1, y.shape[-1])

In [7]:
# Parameters
F = X[0].shape[-1]    # Dimension of node features
n_out = y.shape[-1]   # Dimension of the target
learning_rate = 1e-3  # Learning rate
epochs = 25           # Number of training epochs
batch_size = 128       # Batch size

In [8]:
print(F)
print(n_out)

4
1


In [9]:
# Train/test split
A_train, A_test, \
X_train, X_test, \
y_train, y_test = train_test_split(A, X, y, test_size=0.1)

In [10]:
A_train.shape

(1800,)

In [11]:
A_test.shape

(200,)

In [12]:
len(X_train)

1800

In [13]:
len(y_train)

1800

In [14]:
# Model definition
X_in = Input(batch_shape=(None, F))
A_in = Input(batch_shape=(None, None), sparse=True)
I_in = Input(batch_shape=(None, ), dtype='int64')
target = Input(tensor=tf.placeholder(tf.float32, shape=(None, n_out), name='target'))

W1007 23:34:50.975373 4617369024 deprecation_wrapper.py:119] From /Users/em/anaconda3/envs/venv/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:66: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W1007 23:34:50.986185 4617369024 deprecation_wrapper.py:119] From /Users/em/anaconda3/envs/venv/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:541: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W1007 23:34:50.988109 4617369024 deprecation_wrapper.py:119] From /Users/em/anaconda3/envs/venv/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:539: The name tf.sparse_placeholder is deprecated. Please use tf.compat.v1.sparse_placeholder instead.



In [15]:
gc1 = GraphConv(128, activation='relu')([X_in, A_in])
gc2 = GraphConv(128, activation='relu')([gc1, A_in])
pool = GlobalAvgPool()([gc2, I_in])
dense1 = Dense(128, activation='relu')(pool)
output = Dense(n_out)(dense1)

W1007 23:34:50.996334 4617369024 deprecation_wrapper.py:119] From /Users/em/anaconda3/envs/venv/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:4432: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W1007 23:34:51.005481 4617369024 deprecation_wrapper.py:119] From /Users/em/anaconda3/envs/venv/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:1133: The name tf.sparse_tensor_dense_matmul is deprecated. Please use tf.sparse.sparse_dense_matmul instead.



In [16]:
# Build model
model = Model(inputs=[X_in, A_in, I_in], outputs=output)
optimizer = Adam(lr=learning_rate)
model.compile(optimizer=optimizer, loss='mse', target_tensors=target)
model.summary()

W1007 23:34:51.044615 4617369024 deprecation_wrapper.py:119] From /Users/em/anaconda3/envs/venv/lib/python3.7/site-packages/keras/optimizers.py:793: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.



Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 4)            0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            (None, None)         0                                            
__________________________________________________________________________________________________
graph_conv_1 (GraphConv)        (None, 128)          640         input_1[0][0]                    
                                                                 input_2[0][0]                    
__________________________________________________________________________________________________
graph_conv_2 (GraphConv)        (None, 128)          16512       graph_conv_1[0][0]         

In [17]:
# Training setup
sess = K.get_session()
loss = model.total_loss
opt = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_step = opt.minimize(loss)

In [18]:
# Initialize all variables
init_op = tf.global_variables_initializer()
sess.run(init_op)

In [19]:
batches_train = batch_iterator([A_train, X_train, y_train], batch_size=batch_size, epochs=epochs)
model_loss = 0
batch_index = 0
batches_in_epoch = np.ceil(len(A_train) / batch_size)

In [20]:
# Training loop
for b in batches_train:
    batch = Batch(b[0], b[1])
    X_, A_, I_ = batch.get('XAI')
    y_ = b[2]
    tr_feed_dict = {X_in: X_,
                    A_in: sp_matrix_to_sp_tensor_value(A_),
                    I_in: I_,
                    target: y_,
                    SW_KEY: np.ones((1,))}
    outs = sess.run([train_step, loss], feed_dict=tr_feed_dict)
    model_loss += outs[-1]

    batch_index += 1
    if batch_index == batches_in_epoch:
        print('Loss: {}'.format(model_loss / batches_in_epoch))
        model_loss = 0
        batch_index = 0

Loss: 0.9131667256355286
Loss: 0.8371170878410339
Loss: 0.8244766672452291
Loss: 0.8089590251445771
Loss: 0.7913940072059631
Loss: 0.8291654348373413
Loss: 0.7925759275754293
Loss: 0.7844207763671875
Loss: 0.7973969737688701
Loss: 0.8033288478851318
Loss: 0.761223316192627
Loss: 0.7833080410957336
Loss: 0.8697772105534871
Loss: 0.7430752694606781
Loss: 0.758231536547343
Loss: 0.7865867773691814
Loss: 0.7563559055328369
Loss: 0.7736682494481405
Loss: 0.7291035215059917
Loss: 0.729581892490387
Loss: 0.731050948301951
Loss: 0.7579889496167501
Loss: 0.7058567722638448
Loss: 0.6871962706247966
Loss: 0.6932330509026845


In [21]:
batches_test = batch_iterator([A_test, X_test, y_test], batch_size=batch_size)
model_loss = 0
batches_in_epoch = np.ceil(len(A_test) / batch_size)

In [22]:
# Test loop
for b in batches_test:
    batch = Batch(b[0], b[1])
    X_, A_, I_ = batch.get('XAI')
    y_ = b[2]
    tr_feed_dict = {X_in: X_,
                    A_in: sp_matrix_to_sp_tensor_value(A_),
                    I_in: I_,
                    target: y_,
                    SW_KEY: np.ones((1,))}
    model_loss += sess.run([loss], feed_dict=tr_feed_dict)[0]
print('---------------------------------------------')
print('Test loss: {}'.format(model_loss / batches_in_epoch))

Tensor("loss/mul:0", shape=(), dtype=float32)
Tensor("loss/mul:0", shape=(), dtype=float32)
---------------------------------------------
Test loss: 0.8726409077644348


In [23]:
print(preds)

NameError: name 'preds' is not defined