In [None]:
%run QM9GNN2_Multitask.ipynb

In [None]:
def random_hyperparameter_search(trials=50):
    A_all, X_all, E_all, y_all = load_data(amount=None, mode=mode)
    for _ in trials:
        mode = 'batch'
        conv = 'ecc'
        batch_size = random.choice([16, 32, 64])
        learning_rate = random.choice([1e-2, 1e-3, 1e-4])
        epochs = random.choice([20, 30, 40])
        layer_sizes = [random.choice([16, 32, 64, 128, 256, 512]), 
               random.choice([16, 32, 64, 128, 256, 512]), 
               random.choice([16, 32, 64, 128, 256, 512]), 
               random.choice([64, 128, 256, 512])]
        num_sampled = 30000
        scaler = random.choice(['power_transformer', 'standard_scaler'])
        loss_fn = random.choice([MeanAbsoluteError(), MeanSquaredError()])
        soft_sharing = random.random() < 0.5
        share_param = random.choice([1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3])
    
        A, X, E, y = sample_from_data(30000,
                                     A_all,
                                     X_all,
                                     E_all,
                                     y_all,
                                     mode=mode)
        task_to_scaler = standardize(y, scaler=scaler)
        
        cluster = ['r2', 'cv']
        A_train, A_test, \
        X_train, X_test, \
        E_train, E_test, \
        y_train, y_test = train_test_split(A, X, E, y, test_size=0.1)
        
        if soft_sharing:
            model, _ = build_soft_sharing_model(A=A_train,
                                                X=X_train,
                                                E=E_train,
                                                num_tasks=len(cluster),
                                                share_param=share_param
                                                mode=mode,
                                                conv=conv,
                                                layer_sizes=layer_sizes)
        else:
            model, _ = build_hard_sharing_model(A=A_train,
                                                X=X_train,
                                                E=E_train,
                                                num_tasks=len(cluster),
                                                mode=mode,
                                                conv=conv,
                                                layer_sizes=layer_sizes)
        
        stream = io.StringIO()
        model.summary(print_fn=lambda x: stream.write(x + '\n'))
        summary = stream.getvalue()
        
        params = {'mode': mode,
                 'conv': conv,
                 'batch_size': batch_size,
                 'epochs': epochs,
                 'num_sampled': num_sampled,
                 'learning_rate': learning_rate,
                 'cluster': cluster,
                 'hard_sharing': not soft_sharing,
                 'share_param': share_param,
                 'model_summary': summary,
                 'loss_fn': type(loss_fn).__name__,
                 'optimizer': type(optimizer).__name__}
        model_data = ModelData(params=params)
        
        y_train_cluster = np.hsplit(y_train[cluster].values, len(cluster))
        model.compile(optimizer=optimizer, 
                      loss=loss_fn)
        model.fit(x=[X_train, A_train, E_train], 
                  y=y_train_cluster,
                  batch_size=batch_size,
                  validation_split=0.1,
                  epochs=epochs,
                  callbacks=[model_data.loss_logger])
        
        y_test_cluster = np.hsplit(y_test[cluster].values, len(cluster))
        model_loss = model.evaluate(x=[X_test, A_test, E_test],
                                    y=y_test_cluster)
        model_data.test_loss = model_loss
        cluster_pred = model.predict([X_test, A_test, E_test])

        for prop, batch_pred in zip(cluster, cluster_pred):
            batch_pred = task_to_scaler[prop].inverse_transform(batch_pred)
            errors = list()
            for index, pred in zip(y_test.index.values, batch_pred):
                actual = y_all.loc[index, prop]
                model_data.add_test(prop, actual, pred[0])
        
        dirname = 'model_data'
        dt_string = self.timestamp.strftime('%d-%m-%Y_%H-%M-%S')
        filename = path.join(dirname, 'hyperparam_search' + dt_string + '.pkl')
        model_data.serialize(filename=filename)

In [None]:
if __name__ == '__main__' and '__file__' not in globals():
    random_hyperparameter_search(50)