In [3]:
import matplotlib.pyplot as plt
import numpy as np
from keras.callbacks import EarlyStopping, TensorBoard
from keras.layers import Input, Dense
from keras.models import Model
from keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from keras.backend import mean, square

from spektral.datasets import qm9
from spektral.layers import EdgeConditionedConv, GlobalAttentionPool
from spektral.utils import label_to_one_hot

from os import path

Using TensorFlow backend.


In [4]:
A_complete, X_complete, E_complete, y_complete = qm9.load_data(
    return_type='numpy',
    nf_keys='atomic_num',
    ef_keys='type',
    self_loops=True,
    amount=None # Set to None to train on whole dataset
)
# one-hot labeling of atoms
uniq_X = np.unique(X_complete)
X_complete = label_to_one_hot(X_complete, uniq_X)

Loading QM9 dataset.
Reading SDF


100%|██████████| 133885/133885 [00:38<00:00, 3471.19it/s]


In [3]:
A, X, E = list(), list(), list()
y = y_complete.sample(10000)
for index, row in y.iterrows():
    A.append(A_complete[index])
    X.append(X_complete[index])
    E.append(E_complete[index])
A = np.stack(A, axis=0)
X = np.stack(X, axis=0)
E = np.stack(E, axis=0)

In [4]:
tasks = list(y.columns)[1:]
num_tasks = len(tasks)
y_list = []
for task in tasks:
    y_list.append(y[[task]].values)
    
key_to_index = dict(zip(tasks, range(len(tasks))))
key_to_mean = dict()
key_to_std = dict()
for task in tasks:
    key_to_mean[task] = np.mean(y[[task]].values)
    key_to_std[task] = np.std(y[[task]].values)

# Transforms the output values to have mean 0 and variance 1
for i in range(len(y_list)):
    y_list[i] = StandardScaler().fit_transform(y_list[i]).reshape(-1, y_list[-1].shape[-1])

In [1]:
clusters = [['A', 'B', 'alpha'], 
            ['C', 'r2', 'u0'],
            ['zpve', 'g298', 'cv'],
            ['lumo', 'u298', 'h298'],
            ['mu', 'homo']]

In [6]:
N = X.shape[-2]           # Number of nodes in the graphs
F = X.shape[-1]           # Node features dimensionality
S = E.shape[-1]           # Edge features dimensionality
n_out = y_list[0].shape[-1]    # Dimensionality of the target
learning_rate = 1e-3      # Learning rate for SGD
epochs = 25               # Number of training epochs # formerly 25
batch_size = 64           # Batch size
es_patience = 5           # Patience fot early stopping
soft = False
soft_weight = 0.1

In [7]:
A_train, A_test, \
X_train, X_test, \
E_train, E_test, \
*y_train_test_list = train_test_split(A, X, E, *y_list, test_size = 0.1)

y_train_list = y_train_test_list[::2]
y_test_list = y_train_test_list[1::2]

In [8]:
X_in = Input(shape=(N, F))
A_in = Input(shape=(N, N))
E_in = Input(shape=(N, N, S))





In [9]:
def create_single_task_model():
    gc1 = EdgeConditionedConv(64, activation='relu')([X_in, A_in, E_in])
    gc2 = EdgeConditionedConv(128, activation='relu')([gc1, A_in, E_in])
    pool = GlobalAttentionPool(256)(gc2)
    dense = Dense(256, activation='relu')(pool)
    output = Dense(n_out)(dense)
    return Model(inputs=[X_in, A_in, E_in], outputs=output)

In [10]:
def create_hard_parameter_sharing_model(num_tasks=1):
    gc1 = EdgeConditionedConv(64, activation='relu')([X_in, A_in, E_in])
    gc2 = EdgeConditionedConv(128, activation='relu')([gc1, A_in, E_in])
    pool = GlobalAttentionPool(256)(gc2)
    dense_list = [Dense(256, activation='relu')(pool) for i in range(num_tasks)]
    output_list = [Dense(n_out)(dense_layer) for dense_layer in dense_list]
    return Model(inputs=[X_in, A_in, E_in], outputs=output_list)

In [11]:
def create_soft_paramter_sharing_model_and_loss(soft_weight, num_tasks=1):
    gc1_list = [EdgeConditionedConv(64, activation='relu')([X_in, A_in, E_in]) for i in range(num_tasks)]
    gc2_list = [EdgeConditionedConv(128, activation='relu')([gc1_layer, A_in, E_in]) for gc1_layer in gc1_list]
    pool_list = [GlobalAttentionPool(256)(gc2_layer) for gc2_layer in gc2_list]
    dense_list = [Dense(256, activation='relu')(pool_layer) for pool_layer in pool_list]
    output_list = [Dense(n_out)(dense_layer) for dense_layer in dense_list]
    model = Model(inputs=[X_in, A_in, E_in], outputs=output_list)
    
    def loss(y_true, y_pred):
        avg_layer_diff = 0
        for i in range(len(dense_list)):
            for j in range(i):
                avg_layer_diff += mean(square(dense_list[i]-dense_list[j]))
        avg_layer_diff /= len(dense_list)
        return mean(square(y_pred - y_true)) + soft_weight*avg_layer_diff
    
    return Model(inputs=[X_in, A_in, E_in], outputs=output_list), loss

In [14]:
def generate_model_filename(tasks):
    tasks_str = "".join(sorted(tasks))
    return path.join('demo_models', tasks_str + '.h5')

In [15]:
def generate_model_helper_filename(task):
    return path.join('demo_models', task + '.txt')

In [16]:
def train_and_save_multitask_model(tasks, y_train_list):
    model = create_hard_parameter_sharing_model(len(tasks))
    model.compile(optimizer=Adam(lr=learning_rate), loss='mse')
    es_callback = EarlyStopping(monitor='val_loss', patience=es_patience)
    training_set = [y_train_list[key_to_index[task]] for task in tasks]
    model.fit([X_train, A_train, E_train],
             training_set,
             batch_size=batch_size,
             validation_split=0.1,
             epochs=epochs,
             callbacks=[es_callback])
    model.save_weights(generate_filename(tasks))
    for task in tasks:
        helper_file = generate_helper_filename(task)
        with open(helper_file, 'w') as file:
            print(key_to_mean[task], file=file)
            print(key_to_std[task], file=file)

In [17]:
def load_and_evaluate_multitask_model(tasks, y_test_list):
    model = create_hard_parameter_sharing_model(len(tasks))
    model.load_weights(generate_model_filename(tasks))
    model.compile(optimizer=Adam(lr=learning_rate), loss='mse')
    testing_set = [y_test_list[key_to_index[task]] for task in tasks]
    eval_results = model.evaluate([X_test, A_test, E_test], testing_set, batch_size=batch_size)
    return eval_results

In [18]:
def calculate_property(prop, mol_id):
    for cluster in clusters:
        if prop in cluster:
            model = create_hard_parameter_sharing_model(len(cluster))
            model.load_weights(generate_model_filename(cluster))
            model.compile(optimizer=Adam(lr=learning_rate), loss='mse')
            predictions = model.predict([[X_complete[mol_id-1]], [A_complete[mol_id-1]], [E_complete[mol_id-1]]])
            mean, std = 0, 1
            with open(generate_helper_filename(prop), 'r') as f:
                lines = f.readlines()
                mean = float(lines[0].strip())
                std = float(lines[1].strip())
            prediction = mean + std * predictions[1 + cluster.index(prop)]
            return prediction[0][0]

In [19]:
if __name__ == '__main__' and '__file__' not in globals():
    for cluster in clusters:
        train_and_save_multitask_model(cluster, y_train_list)

In [20]:
if __name__ == '__main__' and '__file__' not in globals():
    print(load_and_evaluate_multitask_model(['A', 'B', 'alpha'], y_test_list))





[0.7527519102096558, 0.42298720276355745, 0.271389740228653, 0.05837496376037598]


In [21]:
if __name__ == '__main__' and '__file__' not in globals():
    print(calculate_property('A', 13333))
    print(y_complete.loc[13333 - 1, 'A'])

4.059586
5.20531


In [None]:
# optimizer = Adam(lr=learning_rate)
# if soft:
#     model, loss = create_soft_paramter_sharing_model_and_loss(X_in, A_in, E_in, soft_weight)
#     model.compile(optimizer=optimizer, loss=loss)
# else:
#     model = create_hard_paramter_sharing_model(X_in, A_in, E_in)
#     model.compile(optimizer=optimizer, loss='mse')
# model.summary()

In [None]:
# log_dir="logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
# tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

In [None]:
# es_callback = EarlyStopping(monitor='val_loss', patience=es_patience)

# model.fit([X_train, A_train, E_train],
#           y_train_list,
#           batch_size=batch_size,
#           validation_split=0.1,
#           epochs=epochs,
#           callbacks=[es_callback])

In [None]:
# print('Evaluating model.')
# eval_results = model.evaluate([X_test, A_test, E_test],
#                               y_test_list,
#                               batch_size=batch_size)
# print('Done.\n'
#       'Test loss: {}'.format(eval_results))

In [None]:
# preds = model.predict([X_test, A_test, E_test])

# if num_tasks == 1:
#     preds = np.transpose(preds)

# for i in range(num_tasks):
#     plt.figure()
#     plt.scatter(preds[i], y_test_list[i], alpha=0.3)
#     plt.plot()
#     plt.title(tasks[i])
#     plt.xlabel('Predicted')
#     plt.ylabel('Actual')
#     # plt.savefig('graphs/' + '11_5_'+tasks[i]+'_multitask')