In [1]:
from net import MolMapNet
from cbks import Reg_EarlyStoppingAndPerformance
import pandas as pd
import numpy as np
import tensorflow as tf
import os
import matplotlib.pyplot as plt
from molmaps import load_map
from joblib import dump, load
import os

np.random.seed(123)
tf.compat.v1.set_random_seed(123)

os.environ["CUDA_VISIBLE_DEVICES"]="4"

In [2]:
def MinMaxScaleClip(df):
    from molmaps.config import load_config
    scaleinfo = load_config(metric='scale')
    res = (df - scaleinfo['min']) / ((scaleinfo['max'] - scaleinfo['min']) + 1e-8)
    res = res.clip(0,1)
    return res

In [3]:
X_name = './tmpignore/X.data'
Y_name = './tmpignore/Y.data'

In [4]:
if not os.path.exists(X_name):
    mp = load_map('./cosine_tsne.mp')
    dfx = pd.read_csv('./molecule_open_data/candidate_train.csv')
    dfx = dfx.set_index('id')
    dfy = pd.read_csv('./molecule_open_data/train_answer.csv')
    dfy = dfy.set_index('id')
    df = dfx.join(dfy)
    df_scale = MinMaxScaleClip(dfx)   
    X = mp.batch_transform(df_scale.values, scale = False, n_jobs = 20)
    dump(X, './tmpignore/X.data')
    dump(Y, './tmpignore/Y.data')    
else:
    X = load(X_name)
    Y = load(Y_name)

In [5]:
train_idx, valid_idx = load('./train_valid_idx.ind')

In [6]:
shape=(X.shape[1], X.shape[2])
n_conformers = 1
epochs = 500
patience = 50
lr = 1e-4
batch_size = 128
loss = 'mse'
criteria = 'val_loss'

dense_layers = [128]
dense_avf = 'relu'
last_avf = 'linear'

In [7]:
trainX = X[train_idx]
trainY = Y[train_idx]

validX = X[valid_idx]
validY = Y[valid_idx]

In [8]:
X.shape, Y.shape, trainX.shape

((79267, 56, 56, 1), (79267, 6), (69883, 56, 56, 1))

In [9]:
model = MolMapNet(input_shape = trainX.shape[1:], 
                   n_outputs=Y.shape[-1],
                   conv1_kernel_size = 13,
                   dense_layers = dense_layers,
                   dense_avf = dense_avf,
                   last_avf = last_avf)

In [10]:
model.count_params()

302726

In [11]:
performance = Reg_EarlyStoppingAndPerformance((trainX, trainY), 
                                            (validX, validY),
                                            n_conformers = n_conformers,
                                            patience=patience, 
                                            criteria = criteria)

opt = tf.keras.optimizers.Adam(lr=lr, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) #
model.compile(optimizer = opt, loss = loss)

In [None]:
model.fit(trainX, trainY, batch_size=batch_size, 
          epochs=500, verbose= 0, shuffle = True, 
          validation_data = (validX, validY), 
          callbacks=[performance]) 