In [1]:
import matplotlib.pyplot as plt
import numpy as np
from joblib import dump, load
import pandas as pd
import tensorflow as tf 
import os
import molmap

from tensorflow.keras.models import load_model
from molmap.model.loss import cross_entropy
def sigmoid(x):
    return 1 / (1 + np.exp(-x))



os.environ["CUDA_VISIBLE_DEVICES"]="5"

# load model

In [2]:
model_name = 'BACE_MODEL_OPT.h5'
model = load_model(model_name, compile=False)
opt = tf.keras.optimizers.Adam(lr=1e-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) #
model.compile(optimizer = opt, loss = cross_entropy)

# load feature extractor

In [3]:
### optimized hyper-parameters
n_neighbors = 15
min_dist = 0.75
mp2 = molmap.loadmap('../optimized_fingerprint.mp')

## predict on valid, test

In [4]:
df_test = pd.read_csv('../data/test.csv')
testX = mp2.batch_transform(df_test.smiles)
y_pred_test = sigmoid(model.predict(testX))

dfres_test = pd.DataFrame(y_pred_test, columns= ['MMNF_prob'])
dfres_test.to_csv('./test.predict_prob.csv')


df_val = pd.read_csv('../data/val.csv')
valX = mp2.batch_transform(df_val.smiles)
y_pred_val = sigmoid(model.predict(valX))

dfres_val = pd.DataFrame(y_pred_val, columns= ['MMNF_prob'])
dfres_val.to_csv('./val.predict_prob.csv')

100%|##########| 152/152 [00:05<00:00, 30.34it/s]
100%|##########| 151/151 [00:02<00:00, 53.52it/s]


## predict on drugs

In [5]:
df_drug = pd.read_csv('../data/drugs.csv')
drugX = mp2.batch_transform(df_drug.smiles)
y_pred = sigmoid(model.predict(drugX))

100%|##########| 26/26 [00:00<00:00, 141.04it/s]


In [6]:
dfres = pd.DataFrame(y_pred, columns= ['MMNF_prob'])
dfres.to_csv('./drugs.predict_prob.csv')
dfres.head(5)

Unnamed: 0,MMNF_prob
0,0.772816
1,0.813703
2,0.804224
3,0.930097
4,0.830551


## predict on chembl novel dataset

In [7]:
df_novel = pd.read_csv('../data/bace_chembl_novel.csv')
novelX = mp2.batch_transform(df_novel.smiles)
y_pred_novel = sigmoid(model.predict(novelX))

100%|##########| 395/395 [00:05<00:00, 69.14it/s]


In [8]:
dfres_novel = pd.DataFrame(y_pred_novel, columns= ['MMNF_prob'])
dfres_novel.to_csv('./chembl_novel.predict_prob.csv')
dfres_novel.head(5)

Unnamed: 0,MMNF_prob
0,0.037353
1,0.626146
2,0.793751
3,0.027771
4,0.777459


## predict on chembl common dataset

In [9]:
df_common = pd.read_csv('../data/bace_chembl_common.csv')
commonX = mp2.batch_transform(df_common.smiles)
y_pred_common = sigmoid(model.predict(commonX))

dfres_common = pd.DataFrame(y_pred_common, columns= ['MMNF_prob'])
dfres_common.to_csv('./chembl_common.predict_prob.csv')
dfres_common.head(5)

100%|##########| 5325/5325 [01:47<00:00, 49.36it/s]


Unnamed: 0,MMNF_prob
0,0.05091
1,0.999788
2,0.999704
3,0.99959
4,0.257111
