In [None]:
import pickle
from os import path
from time import time
import itertools

import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.backend import mean, square

from spektral.datasets import qm9
from spektral.layers import EdgeConditionedConv, GlobalSumPool, GlobalAttentionPool
from spektral.utils import label_to_one_hot

from sklearn.preprocessing import StandardScaler, PowerTransformer

In [1]:
def load_data(amount=None):
    A_all, X_all, E_all, y_all = qm9.load_data(return_type='numpy',
                               nf_keys='atomic_num',
                               ef_keys='type',
                               self_loops=True,
                               amount=amount) # None for entire dataset
    # Preprocessing
    X_uniq = np.unique(X_all)
    X_uniq = X_uniq[X_uniq != 0]
    E_uniq = np.unique(E_all)
    E_uniq = E_uniq[E_uniq != 0]
    
    X_all = label_to_one_hot(X_all, X_uniq)
    E_all = label_to_one_hot(E_all, E_uniq)
    
    return A_all, X_all, E_all, y_all

In [None]:
def sample_from_data(sample_size, A_all, X_all, E_all, y_all):
    indices = np.random.choice(X_all.shape[0], sample_size, replace=False)
    A = A_all[indices, :, :]
    X = X_all[indices, :, :]
    E = E_all[indices, :, :, :]
    y = y_all.iloc[indices, :].copy()
    
    return A, X, E, y

In [None]:
def standardize(y):
    task_to_scaler = dict()
    for task in list(y.columns)[1:]:
        scaler = PowerTransformer()
        y.loc[:, task] = scaler.fit_transform(y[[task]])
        task_to_scaler[task] = scaler
    return task_to_scaler

In [None]:
def get_shape_params(*, A, X, E):
    N = X.shape[-2]       # Number of nodes in the graphs
    F = X[0].shape[-1]    # Dimension of node features
    S = E[0].shape[-1]    # Dimension of edge features
    
    return N, F, S

In [None]:
def get_input_tensors(*, A, X, E):
    N, F, S = get_shape_params(A=A, X=X, E=E)
    X_in = Input(shape=(N, F))
    A_in = Input(shape=(N, N))
    E_in = Input(shape=(N, N, S))
    
    return X_in, A_in, E_in

In [None]:
def build_single_task_model(*, A, X, E, learning_rate=1e-3, loss='mse'):
    X_in, A_in, E_in = get_input_tensors(A=A, X=X, E=E)

    gc1 = EdgeConditionedConv(64, activation='relu')([X_in, A_in, E_in])
    gc2 = EdgeConditionedConv(128, activation='relu')([gc1, A_in, E_in])
    pool = GlobalAttentionPool(256)(gc2)
    dense = Dense(256, activation='relu')(pool)
    output = Dense(1)(dense)

    # Build model
    model = Model(inputs=[X_in, A_in, E_in], outputs=output)
    optimizer = Adam(lr=learning_rate)
    model.compile(optimizer=optimizer, loss=loss)

    return model

In [None]:
def build_hard_sharing_model(*, A, X, E, num_tasks, 
                             learning_rate=1e-3, loss='mse'):
    X_in, A_in, E_in = get_input_tensors(A=A, X=X, E=E)

    gc1 = EdgeConditionedConv(64, activation='relu')([X_in, A_in, E_in])
    gc2 = EdgeConditionedConv(128, activation='relu')([gc1, A_in, E_in])
    pool = GlobalAttentionPool(256)(gc2)
    dense_list = [Dense(256, activation='relu')(pool) 
                  for i in range(num_tasks)]
    output_list = [Dense(1)(dense_layer) for dense_layer in dense_list]

    model = Model(inputs=[X_in, A_in, E_in], outputs=output_list)
    optimizer = Adam(lr=learning_rate)
    model.compile(optimizer=optimizer, loss=loss)

    return model

In [None]:
def build_soft_sharing_model(*, A, X, E, num_tasks, share_param, 
                             learning_rate=1e-3, loss='mse'):
    X_in, A_in, E_in = get_input_tensors(A=A, X=X, E=E)

    gc1_list = [EdgeConditionedConv(64, activation='relu')([X_in, A_in, E_in]) 
                for i in range(num_tasks)]
    gc2_list = [EdgeConditionedConv(128, activation='relu')([gc1, A_in, E_in]) 
                for gc1 in gc1_list]
    pool_list = [GlobalAttentionPool(256)(gc2) for gc2 in gc2_list]
    dense_list = [Dense(256, activation='relu')(pool) for pool in pool_list]
    output_list = [Dense(1)(dense) for dense in dense_list]

    def loss(y_actual, y_pred):
        avg_layer_diff = 0
        for i, j in itertools.combinations(range(num_tasks), 2):
            for gc in [gc1_list, gc2_list]:
                diff = gc[i].trainable_weights - gc[j].trainable_weights
                avg_layer_diff += mean(square(diff))
        avg_layer_diff /= (num_tasks)*(num_tasks-1)/2  
        return mean(square(y_actual - y_pred)) + share_param*avg_layer_diff

    model = Model(inputs=[X_in, A_in, E_in], outputs=output_list)
    optimizer = Adam(lr=learning_rate)
    model.compile(optimizer=optimizer, loss=loss)

    return model

In [None]:
def generate_model_filename(tasks, folder_path='demo_models'):
    filename = "".join(sorted(tasks))
    return path.join(folder_path, f'{filename}.h5')

def generate_task_scaler_filename(task, folder_path='demo_models'):
    return path.join(folder_path, f'{task}_scaler.pkl')

In [None]:
def save_model(model, tasks, task_to_scaler):
    model.save_weights(generate_model_filename(tasks))
    for task in tasks:
        scaler_filename = generate_task_scaler_filename(task)
        with open(scaler_filename, 'wb') as f:
            scaler = task_to_scaler[task]
            pickle.dump(obj=scaler, file=f)

def load_hard_sharing_model(*, A, X, E, tasks, task_to_scaler=dict()):
    model = build_hard_sharing_model(A=A, X=X, E=E, num_tasks=len(tasks))
    model.load_weights(generate_model_filename(tasks))
    for task in tasks:
        if task not in task_to_scaler:
            with open(generate_task_scaler_filename(task), 'rb') as f:
                task_to_scaler[task] = pickle.load(f)
    return model, task_to_scaler

In [None]:
def predict_property(prop, mol_id, clusters, *, X_all, A_all, E_all, 
                     model=None, task_to_scaler=dict()):
    cluster = [c for c in clusters if prop in c][0]
    if model is None:
        model, task_to_scaler = load_hard_sharing_model(
            A=A_all, X=X_all, E=E_all, tasks=cluster, 
            task_to_scaler=task_to_scaler
        )
    i = mol_id - 1

    # convert shape for batch mode
    def wrap(a):
        return a.reshape([1] + list(a.shape))
    x = list(map(wrap, [X_all[i], A_all[i], E_all[i]]))

    cluster_prediction = model.predict(x)       
    prediction = cluster_prediction[cluster.index(prop)]
    prediction = task_to_scaler[prop].inverse_transform(prediction)
    return prediction[0][0]

In [None]:
if __name__ == '__main__' and '__file__' not in globals():    
    A_all, X_all, E_all, y_all = load_data()
    N, F, S = get_shape_params(A=A_all, X=X_all, E=E_all)
    # n_out = y_all.shape[-1]   # Dimension of the target

In [None]:
if __name__ == '__main__' and '__file__' not in globals(): 
    A, X, E, y = sample_from_data(1000, A_all, X_all, E_all, y_all)
    task_to_scaler = standardize(y)

In [None]:
clusters = [['A', 'B', 'alpha'], 
            ['C', 'r2', 'u0'],
            ['zpve', 'g298', 'cv'],
            ['lumo', 'u298', 'h298'],
            ['mu', 'homo']]

In [None]:
if __name__ == '__main__' and '__file__' not in globals():     
    A_train, A_test, \
        X_train, X_test, \
        E_train, E_test, \
        y_train, y_test = train_test_split(A, X, E, y, test_size=0.1)

In [None]:
if __name__ == '__main__' and '__file__' not in globals():
    print('begin training models')
    for cluster in clusters:
        print(f'training {cluster}')
        model = build_hard_sharing_model(
            A=A_train, X=X_train, E=E_train, num_tasks=len(cluster)
        )
        y_train_cluster = np.hsplit(y_train[cluster].values, len(cluster))
        model.fit(x=[X_train, A_train, E_train], 
                  y=y_train_cluster,
                  batch_size=32,
                  validation_split=0.1,
                  epochs=25)
        save_model(model, cluster, task_to_scaler)

In [None]:
if __name__ == '__main__' and '__file__' not in globals():   
    for cluster in clusters:
        model, task_to_scaler = load_hard_sharing_model(
            A=A_test, X=X_test, E=E_test, tasks=cluster, 
            task_to_scaler=task_to_scaler
        )
        y_test_cluster = np.hsplit(y_test[cluster].values, len(cluster))
        model_loss = model.evaluate(x=[X_test, A_test, E_test],
                                    y=y_test_cluster)
        print(f"Test loss on {cluster}: {model_loss}")
        
        cluster_pred = model.predict([X_test, A_test, E_test])
        for prop, batch_pred in zip(cluster, cluster_pred):
            batch_pred = task_to_scaler[prop].inverse_transform(batch_pred)
            errors = list()
            for index, pred in zip(y_test.index.values, batch_pred):
                actual = y_all.loc[index, prop]
                err = abs((pred-actual)/actual)
                errors.append(err[0])
            print(f'Avg error of {prop} is {sum(errors)/len(errors):.2%}')