## nn Approach



### Setup

In [12]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import datasets

import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler


from pyMLaux import plot_history, evaluate_classification_result

In [13]:
base_dir = '/home/bac/activity_prediction/implementation/'
data_dir = base_dir + 'data/source/'
result_dir = base_dir + 'data/results/'

### load & prepare dataset

the following code needs to be adapted for each protein-ligand complex individually

In [14]:
nn_data_raw_mdi = pd.read_csv(data_dir+"ACHE/ache_mdi.csv")
nn_data_raw_per = pd.read_csv(data_dir+"ACHE/ache_per.csv")
nn_data_raw = pd.read_csv(data_dir+"ACHE/ache.csv")

In [15]:
lookup = {'inactive':0,'active':1}

nn_data_per = {'data': np.array(nn_data_raw_per.iloc[:, 1:-1]),
             'target': np.array([lookup[y] for y in nn_data_raw_per.iloc[0:,-1]]),
             'feature_names': nn_data_raw_per.columns[1:-1],
             'target_names': ['inactive', 'active']}

nn_data_mdi = {'data': np.array(nn_data_raw_mdi.iloc[:, 1:-1]),
             'target': np.array([lookup[y] for y in nn_data_raw_mdi.iloc[0:,-1]]),
             'feature_names': nn_data_raw_mdi.columns[1:-1],
             'target_names': ['inactive', 'active']}

nn_data_base = {'data': np.array(nn_data_raw.iloc[:, 2:-1]),
             'target': np.array([lookup[y] for y in nn_data_raw.iloc[0:,-1]]),
             'feature_names': nn_data_raw.columns[2:-1],
             'target_names': ['inactive', 'active']}


split into train- and test-set

In [16]:
X_train_base, X_test_base, y_train_base, y_test_base = train_test_split(nn_data_base['data'], nn_data_base['target'],
                                                    test_size=0.3, random_state=4232)

X_train_mdi, X_test_mdi, y_train_mdi, y_test_mdi = train_test_split(nn_data_mdi['data'], nn_data_mdi['target'],
                                                    test_size=0.3, random_state=4232)

X_train_per, X_test_per, y_train_per, y_test_per = train_test_split(nn_data_per['data'], nn_data_per['target'],
                                                    test_size=0.3, random_state=4232)

### train and apply neural network

In [17]:
model_base = tf.keras.models.Sequential([
    tf.keras.layers.Input(shape=(nn_data_base['data'].shape[1], )),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model_base.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

model_per = tf.keras.models.Sequential([
    tf.keras.layers.Input(shape=(nn_data_per['data'].shape[1], )),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model_per.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

model_mdi = tf.keras.models.Sequential([
    tf.keras.layers.Input(shape=(nn_data_mdi['data'].shape[1], )),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model_mdi.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])



In [18]:
history_base = model_base.fit(x=X_train_base, y=y_train_base, epochs=150, batch_size=16, validation_split=0.2, verbose=2)
history_per = model_per.fit(x=X_train_per, y=y_train_per, epochs=150, batch_size=16, validation_split=0.2, verbose=2)
history_mdi = model_mdi.fit(x=X_train_mdi, y=y_train_mdi, epochs=150, batch_size=16, validation_split=0.2, verbose=2)


Epoch 1/150


36/36 - 3s - loss: 0.7807 - accuracy: 0.4207 - val_loss: 0.7560 - val_accuracy: 0.4255 - 3s/epoch - 80ms/step
Epoch 2/150
36/36 - 1s - loss: 0.7254 - accuracy: 0.4706 - val_loss: 0.7238 - val_accuracy: 0.4823 - 903ms/epoch - 25ms/step
Epoch 3/150
36/36 - 1s - loss: 0.6897 - accuracy: 0.5330 - val_loss: 0.6998 - val_accuracy: 0.5106 - 800ms/epoch - 22ms/step
Epoch 4/150
36/36 - 1s - loss: 0.6607 - accuracy: 0.6096 - val_loss: 0.6796 - val_accuracy: 0.5745 - 744ms/epoch - 21ms/step
Epoch 5/150
36/36 - 1s - loss: 0.6362 - accuracy: 0.6738 - val_loss: 0.6624 - val_accuracy: 0.6170 - 750ms/epoch - 21ms/step
Epoch 6/150
36/36 - 1s - loss: 0.6163 - accuracy: 0.7041 - val_loss: 0.6486 - val_accuracy: 0.6525 - 855ms/epoch - 24ms/step
Epoch 7/150
36/36 - 1s - loss: 0.5996 - accuracy: 0.7184 - val_loss: 0.6361 - val_accuracy: 0.6596 - 809ms/epoch - 22ms/step
Epoch 8/150
36/36 - 1s - loss: 0.5830 - accuracy: 0.7469 - val_loss: 0.6218 - val_accuracy: 0.6667 - 771ms/epoch - 21ms/step
Epoch 9/150
36/

Evaluate Testdata using model

In [19]:

pred_base = model_base.predict(X_test_base)
pred_per = model_per.predict(X_test_per)
pred_mdi = model_mdi.predict(X_test_mdi)

print("---------Base-Prediction----------")
evaluate_classification_result(y_test_base,pred_base,classes=nn_data_base["target_names"])
print("---------Permutation-FeatureSelection-Prediction----------")
evaluate_classification_result(y_test_per,pred_per,classes=nn_data_per["target_names"])
print("---------MDI-FeatureSelection-Prediction----------")
evaluate_classification_result(y_test_mdi,pred_mdi,classes=nn_data_mdi["target_names"])


---------Base-Prediction----------
[[137   0]
 [164   0]]


Class inactive:
    Sensitivity (TPR): 100.000% (137 of 137)
    Specificity (TNR):   0.000% (0 of 164)
    Precision:          45.515% (137 of 301)
    Neg. pred. value:      nan% (0 of 0)
Class active:
    Sensitivity (TPR):   0.000% (0 of 164)
    Specificity (TNR): 100.000% (137 of 137)
    Precision:             nan% (0 of 0)
    Neg. pred. value:   45.515% (137 of 301)

Overall accuracy:   45.515% (137 of 301)
Balanced accuracy:  50.000%
---------Permutation-FeatureSelection-Prediction----------
[[137   0]
 [164   0]]


Class inactive:
    Sensitivity (TPR): 100.000% (137 of 137)
    Specificity (TNR):   0.000% (0 of 164)
    Precision:          45.515% (137 of 301)
    Neg. pred. value:      nan% (0 of 0)
Class active:
    Sensitivity (TPR):   0.000% (0 of 164)
    Specificity (TNR): 100.000% (137 of 137)
    Precision:             nan% (0 of 0)
    Neg. pred. value:   45.515% (137 of 301)

Overall accuracy:   45.515% (

  print('    Neg. pred. value:  %7.3f%% (%d of %d)'%(100. * tn / (tn + fn) , tn, tn + fn))
  prec = tp / (tp + fp)
  print('    Neg. pred. value:  %7.3f%% (%d of %d)'%(100. * tn / (tn + fn) , tn, tn + fn))
  prec = tp / (tp + fp)
  print('    Neg. pred. value:  %7.3f%% (%d of %d)'%(100. * tn / (tn + fn) , tn, tn + fn))
  prec = tp / (tp + fp)


array([[137,   0],
       [164,   0]])

In [22]:
pred_base = model_base.predict(X_test_base)
classes_base = [1 if i > 0.5  else 0 for i in pred_base]

pred_per = model_per.predict(X_test_per)
classes_per = [1 if i > 0.5  else 0 for i in pred_per]

pred_mdi = model_mdi.predict(X_test_mdi)
classes_mdi = [1 if i > 0.5  else 0 for i in pred_mdi]



In [23]:
result_df = pd.concat(
    [
        pd.DataFrame(columns=["INDEX"]),
        pd.DataFrame(columns=nn_data_raw_per.columns[1:-1]),
    ]
)

for i, row in enumerate(X_test_per):
    data = [i]
    data.extend(row)
    result_df.loc[len(result_df["INDEX"])] = data

result_df["LABEL"] = y_test_per
result_df["PRED"] = classes_per

result_df.to_csv(result_dir+"ACHE/fe_rf_per_nn.csv",encoding="utf-8")

In [24]:
result_df = pd.concat(
    [
        pd.DataFrame(columns=["INDEX"]),
        pd.DataFrame(columns=nn_data_raw_mdi.columns[1:-1]),
    ]
)

for i, row in enumerate(X_test_mdi):
    data = [i]
    data.extend(row)
    result_df.loc[len(result_df["INDEX"])] = data

result_df["LABEL"] = y_test_mdi
result_df["PRED"] = classes_mdi

result_df.to_csv(result_dir+"ACHE/fe_mdi_per_nn.csv",encoding="utf-8")