In [1]:
import matplotlib.pyplot as plt
import numpy as np
from keras.callbacks import EarlyStopping
from keras.layers import Input, Dense
from keras.models import Model, Sequential
from keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from keras.backend import mean, square

from spektral.datasets import qm9
from spektral.layers import EdgeConditionedConv, GlobalAttentionPool
from spektral.utils import label_to_one_hot

Using TensorFlow backend.


In [2]:
A, X, E, y = qm9.load_data(return_type='numpy',
                          nf_keys='atomic_num',
                          ef_keys='type',
                          self_loops=True,
                          amount=500)
uniq_X = np.unique(X)
X = label_to_one_hot(X, uniq_X)

Loading QM9 dataset.
Reading SDF


100%|██████████| 500/500 [00:00<00:00, 2826.41it/s]


In [3]:
y

Unnamed: 0,mol_id,A,B,C,mu,alpha,homo,lumo,gap,r2,zpve,u0,u298,h298,g298,cv,u0_atom,u298_atom,h298_atom,g298_atom
0,gdb_1,157.71180,157.709970,157.706990,0.0000,13.21,-0.3877,0.1171,0.5048,35.3641,0.044749,-40.478930,-40.476062,-40.475117,-40.498597,6.469,-395.999595,-398.643290,-401.014647,-372.471772
1,gdb_2,293.60975,293.541110,191.393970,1.6256,9.46,-0.2570,0.0829,0.3399,26.1563,0.034358,-56.525887,-56.523026,-56.522082,-56.544961,6.316,-276.861363,-278.620271,-280.399259,-259.338802
2,gdb_3,799.58812,437.903860,282.945450,1.8511,6.31,-0.2928,0.0687,0.3615,19.0002,0.021375,-76.404702,-76.401867,-76.400922,-76.422349,6.002,-213.087624,-213.974294,-215.159658,-201.407171
3,gdb_4,0.00000,35.610036,35.610036,0.0000,16.28,-0.2845,0.0506,0.3351,59.5248,0.026841,-77.308427,-77.305527,-77.304583,-77.327429,8.574,-385.501997,-387.237686,-389.016047,-365.800724
4,gdb_5,0.00000,44.593883,44.593883,2.8937,12.99,-0.3604,0.0191,0.3796,48.7476,0.016601,-93.411888,-93.409370,-93.408425,-93.431246,6.278,-301.820534,-302.906752,-304.091489,-288.720028
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,gdb_496,20.93178,1.400360,1.363780,2.4084,48.12,-0.2740,-0.0464,0.2276,792.7020,0.083388,-285.313674,-285.307112,-285.306168,-285.344659,21.004,-1114.946069,-1120.604945,-1126.531767,-1045.635189
496,gdb_497,12.81085,1.748270,1.688220,3.3746,49.89,-0.2561,0.0191,0.2752,691.5345,0.083614,-285.276634,-285.270024,-285.269079,-285.307545,22.134,-1091.703135,-1097.331891,-1103.258086,-1022.345820
497,gdb_498,11.45832,1.833160,1.769770,3.6197,44.97,-0.2788,-0.0017,0.2772,652.2200,0.073313,-301.382062,-301.375865,-301.374920,-301.412562,20.073,-1009.255983,-1014.254719,-1019.587918,-946.018136
498,gdb_499,10.36100,1.925430,1.714020,3.7921,45.77,-0.2819,-0.0051,0.2768,660.5683,0.070779,-305.145002,-305.138580,-305.137636,-305.175765,20.964,-1021.373809,-1026.231984,-1031.565810,-958.661187


In [4]:
task_learned, task_transferred = 'zpve', 'h298_atom'
y_learned = StandardScaler().fit_transform(y[[task_learned]].values).reshape(-1, y[[task_learned]].values.shape[-1])
y_transferred = StandardScaler().fit_transform(y[[task_transferred]].values).reshape(-1, y[[task_transferred]].values.shape[-1])

In [5]:
N = X.shape[-2]
F = X.shape[-1]
S = E.shape[-1]
n_out = y_learned.shape[-1]
learning_rate = 1e-3
epochs = 3
batch_size = 64
es_patience = 5

In [6]:
A_train, A_test, \
X_train, X_test, \
E_train, E_test, \
y_learned_train, \
y_learned_test, \
y_transferred_train, \
y_transferred_test = train_test_split(A, X, E, y_learned, y_transferred, test_size=0.1)

In [22]:
X_in = Input(shape=(N, F))
A_in = Input(shape=(N, N))
E_in = Input(shape=(N, N, S))

In [13]:
def create_model():
    X_in = Input(shape=(N, F))
    A_in = Input(shape=(N, N))
    E_in = Input(shape=(N, N, S))
    gc1 = EdgeConditionedConv(64, activation='relu')([X_in, A_in, E_in])
    gc2 = EdgeConditionedConv(128, activation='relu')([gc1, A_in, E_in])
    pool = GlobalAttentionPool(256)(gc2)
    dense = Dense(256, activation='relu')(pool)
    output = Dense(n_out)(dense)
    return Model(inputs=[X_in, A_in, E_in], outputs=output) 

In [30]:
def train_and_save_model(model, X_train, A_train, E_train, y_train, y_test, filename):
    model.compile(optimizer='adam', loss='mse')
    es_callback = EarlyStopping(monitor='val_loss', patience=es_patience)
    model.fit([X_train, A_train, E_train],
              y_train,
              batch_size=batch_size,
              validation_split=0.1,
              epochs=epochs,
              callbacks=[es_callback])
    model.save(path.join('single_task_trained_models', filename, '.h5'))

In [15]:
learned_model = create_model()
learned_model.summary()
learned_model.compile(optimizer='adam', loss='mse')


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_4 (InputLayer)            (None, 6, 5)         0                                            
__________________________________________________________________________________________________
input_5 (InputLayer)            (None, 6, 6)         0                                            
__________________________________________________________________________________________________
input_6 (InputLayer)            (None, 6, 6, 1)      0                                            
__________________________________________________________________________________________________
edge_conditioned_conv_1 (EdgeCo (None, 6, 64)        704         input_4[0][0]                    
                                                                 input_5[0][0]                    
         

In [16]:
transferred_model = create_model()
transferred_model.summary()
transferred_model.compile(optimizer='adam', loss='mse')

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_7 (InputLayer)            (None, 6, 5)         0                                            
__________________________________________________________________________________________________
input_8 (InputLayer)            (None, 6, 6)         0                                            
__________________________________________________________________________________________________
input_9 (InputLayer)            (None, 6, 6, 1)      0                                            
__________________________________________________________________________________________________
edge_conditioned_conv_3 (EdgeCo (None, 6, 64)        704         input_7[0][0]                    
                                                                 input_8[0][0]                    
          

In [17]:
es_callback = EarlyStopping(monitor='val_loss', patience=es_patience)

In [18]:
learned_model.fit([X_train, A_train, E_train],
                          y_learned_train,
                          batch_size=batch_size,
                          validation_split=0.1,
                          epochs=epochs,
                          callbacks=[es_callback])



Train on 405 samples, validate on 45 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7fa374a13ac8>

In [19]:
transferred_model.fit([X_train, A_train, E_train],
                              y_transferred_train,
                              batch_size=batch_size,
                              validation_split=0.1,
                              epochs=epochs,
                              callbacks=[es_callback])

Train on 405 samples, validate on 45 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7fa3746f66d8>

In [20]:
eval_results = transferred_model.evaluate([X_test, A_test, E_test],
                              y_transferred_test,
                              batch_size=batch_size)
print('Done.\n'
      'Test loss: {}'.format(eval_results))

Done.
Test loss: 0.816130518913269


In [23]:
learned_layers = learned_model.get_weights()
for layer in learned_layers:
    print(layer.shape)
learned_layers[10].shape

(64,)
(1, 320)
(320,)
(128,)
(1, 8192)
(8192,)
(128, 256)
(256,)
(128, 256)
(256,)
(256, 256)
(256,)
(256, 1)
(1,)


(256, 256)

In [26]:
transferred_layers = transferred_model.get_weights()
transferred_layers = learned_layers[:10] + transferred_layers[10:]
for layer in transferred_layers:
    print(layer.shape)
transferred_model.set_weights(transferred_layers)

(64,)
(1, 320)
(320,)
(128,)
(1, 8192)
(8192,)
(128, 256)
(256,)
(128, 256)
(256,)
(256, 256)
(256,)
(256, 1)
(1,)


In [32]:
eval_results = transferred_model.evaluate([X_test, A_test, E_test],
                              y_transferred_test,
                              batch_size=batch_size)
print('Done.\n'
      'Test loss: {}'.format(eval_results))

Done.
Test loss: 0.8942722082138062

0.8942722082138062
