## nn Approach



### Setup

In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import datasets

import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler

import os
from pathlib import Path

from pyMLaux import plot_history, evaluate_classification_result

In [4]:
# read dynamic path
base_dir = Path(os.getcwd()) / "implementation"
data_dir = base_dir / "data/source/"
result_dir = base_dir / "data/results/"

### load & prepare dataset

the following code needs to be adapted for each protein-ligand complex individually

In [5]:
nn_data_raw_mdi = pd.read_csv(data_dir/"ACHE/ache_mdi.csv")
nn_data_raw_per = pd.read_csv(data_dir/"ACHE/ache_per.csv")
nn_data_raw = pd.read_csv(data_dir/"ACHE/ache.csv")

In [6]:
lookup = {'inactive':0,'active':1}

nn_data_per = {'data': np.array(nn_data_raw_per.iloc[:, 1:-1]),
             'target': np.array([lookup[y] for y in nn_data_raw_per.iloc[0:,-1]]),
             'feature_names': nn_data_raw_per.columns[1:-1],
             'target_names': ['inactive', 'active']}

nn_data_mdi = {'data': np.array(nn_data_raw_mdi.iloc[:, 1:-1]),
             'target': np.array([lookup[y] for y in nn_data_raw_mdi.iloc[0:,-1]]),
             'feature_names': nn_data_raw_mdi.columns[1:-1],
             'target_names': ['inactive', 'active']}

nn_data_base = {'data': np.array(nn_data_raw.iloc[:, 2:-1]),
             'target': np.array([lookup[y] for y in nn_data_raw.iloc[0:,-1]]),
             'feature_names': nn_data_raw.columns[2:-1],
             'target_names': ['inactive', 'active']}


split into train- and test-set

In [7]:
X_train_base, X_test_base, y_train_base, y_test_base = train_test_split(nn_data_base['data'], nn_data_base['target'],
                                                    test_size=0.3, random_state=4232)

X_train_mdi, X_test_mdi, y_train_mdi, y_test_mdi = train_test_split(nn_data_mdi['data'], nn_data_mdi['target'],
                                                    test_size=0.3, random_state=4232)

X_train_per, X_test_per, y_train_per, y_test_per = train_test_split(nn_data_per['data'], nn_data_per['target'],
                                                    test_size=0.3, random_state=4232)

### train and apply neural network

In [8]:
model_base = tf.keras.models.Sequential([
    tf.keras.layers.Input(shape=(nn_data_base['data'].shape[1], )),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model_base.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

model_per = tf.keras.models.Sequential([
    tf.keras.layers.Input(shape=(nn_data_per['data'].shape[1], )),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model_per.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

model_mdi = tf.keras.models.Sequential([
    tf.keras.layers.Input(shape=(nn_data_mdi['data'].shape[1], )),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model_mdi.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])



2024-04-29 18:59:22.794202: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-04-29 18:59:23.360474: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-04-29 18:59:23.360719: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-04-29 18:59:23.382468: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-04-29 18:59:23.382686: I external/local_xla/xla/stream_executor

In [9]:
history_base = model_base.fit(x=X_train_base, y=y_train_base, epochs=150, batch_size=16, validation_split=0.2, verbose=2)
history_per = model_per.fit(x=X_train_per, y=y_train_per, epochs=150, batch_size=16, validation_split=0.2, verbose=2)
history_mdi = model_mdi.fit(x=X_train_mdi, y=y_train_mdi, epochs=150, batch_size=16, validation_split=0.2, verbose=2)


Epoch 1/150


2024-04-29 18:59:32.118736: I external/local_tsl/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2024-04-29 18:59:35.036670: I external/local_xla/xla/service/service.cc:168] XLA service 0x7fe7d07afca0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2024-04-29 18:59:35.036738: I external/local_xla/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA GeForce RTX 3050 Laptop GPU, Compute Capability 8.6
2024-04-29 18:59:35.197656: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2024-04-29 18:59:35.454626: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:454] Loaded cuDNN version 8904
I0000 00:00:1714409975.954500   17391 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


36/36 - 10s - loss: 0.7267 - accuracy: 0.5098 - val_loss: 0.7212 - val_accuracy: 0.5177 - 10s/epoch - 275ms/step
Epoch 2/150
36/36 - 1s - loss: 0.6805 - accuracy: 0.5811 - val_loss: 0.6889 - val_accuracy: 0.5532 - 1s/epoch - 34ms/step
Epoch 3/150
36/36 - 1s - loss: 0.6488 - accuracy: 0.6221 - val_loss: 0.6670 - val_accuracy: 0.5745 - 1s/epoch - 35ms/step
Epoch 4/150
36/36 - 1s - loss: 0.6240 - accuracy: 0.6720 - val_loss: 0.6498 - val_accuracy: 0.5816 - 1s/epoch - 36ms/step
Epoch 5/150
36/36 - 1s - loss: 0.6039 - accuracy: 0.6952 - val_loss: 0.6351 - val_accuracy: 0.6241 - 1s/epoch - 35ms/step
Epoch 6/150
36/36 - 1s - loss: 0.5852 - accuracy: 0.7148 - val_loss: 0.6213 - val_accuracy: 0.6667 - 1s/epoch - 39ms/step
Epoch 7/150
36/36 - 1s - loss: 0.5685 - accuracy: 0.7540 - val_loss: 0.6100 - val_accuracy: 0.7021 - 1s/epoch - 33ms/step
Epoch 8/150
36/36 - 1s - loss: 0.5543 - accuracy: 0.7629 - val_loss: 0.5996 - val_accuracy: 0.7234 - 1s/epoch - 34ms/step
Epoch 9/150
36/36 - 1s - loss: 0.

Evaluate Testdata using model

In [10]:

pred_base = model_base.predict(X_test_base)
pred_per = model_per.predict(X_test_per)
pred_mdi = model_mdi.predict(X_test_mdi)

print("---------Base-Prediction----------")
evaluate_classification_result(y_test_base,pred_base,classes=nn_data_base["target_names"])
print("---------Permutation-FeatureSelection-Prediction----------")
evaluate_classification_result(y_test_per,pred_per,classes=nn_data_per["target_names"])
print("---------MDI-FeatureSelection-Prediction----------")
evaluate_classification_result(y_test_mdi,pred_mdi,classes=nn_data_mdi["target_names"])




---------Base-Prediction----------
[[137   0]
 [164   0]]


Class inactive:
    Sensitivity (TPR): 100.000% (137 of 137)
    Specificity (TNR):   0.000% (0 of 164)
    Precision:          45.515% (137 of 301)
    Neg. pred. value:      nan% (0 of 0)
Class active:
    Sensitivity (TPR):   0.000% (0 of 164)
    Specificity (TNR): 100.000% (137 of 137)
    Precision:             nan% (0 of 0)
    Neg. pred. value:   45.515% (137 of 301)

Overall accuracy:   45.515% (137 of 301)
Balanced accuracy:  50.000%
---------Permutation-FeatureSelection-Prediction----------
[[137   0]
 [164   0]]


Class inactive:
    Sensitivity (TPR): 100.000% (137 of 137)
    Specificity (TNR):   0.000% (0 of 164)
    Precision:          45.515% (137 of 301)
    Neg. pred. value:      nan% (0 of 0)
Class active:
    Sensitivity (TPR):   0.000% (0 of 164)
    Specificity (TNR): 100.000% (137 of 137)
    Precision:             nan% (0 of 0)
    Neg. pred. value:   45.515% (137 of 301)

Overall accuracy:   45.515% (

  print('    Neg. pred. value:  %7.3f%% (%d of %d)'%(100. * tn / (tn + fn) , tn, tn + fn))
  prec = tp / (tp + fp)
  print('    Neg. pred. value:  %7.3f%% (%d of %d)'%(100. * tn / (tn + fn) , tn, tn + fn))
  prec = tp / (tp + fp)
  print('    Neg. pred. value:  %7.3f%% (%d of %d)'%(100. * tn / (tn + fn) , tn, tn + fn))
  prec = tp / (tp + fp)


array([[137,   0],
       [164,   0]])

In [11]:
pred_base = model_base.predict(X_test_base)
classes_base = [1 if i > 0.5  else 0 for i in pred_base]

pred_per = model_per.predict(X_test_per)
classes_per = [1 if i > 0.5  else 0 for i in pred_per]

pred_mdi = model_mdi.predict(X_test_mdi)
classes_mdi = [1 if i > 0.5  else 0 for i in pred_mdi]

 1/10 [==>...........................] - ETA: 0s



In [12]:
result_df = pd.concat(
    [
        pd.DataFrame(columns=["INDEX"]),
        pd.DataFrame(columns=nn_data_raw_per.columns[1:-1]),
    ]
)

for i, row in enumerate(X_test_per):
    data = [i]
    data.extend(row)
    result_df.loc[len(result_df["INDEX"])] = data

result_df["LABEL"] = y_test_per
result_df["PRED"] = classes_per

result_df.to_csv(result_dir/"ACHE/fe_rf_per_nn.csv",encoding="utf-8")

In [13]:
result_df = pd.concat(
    [
        pd.DataFrame(columns=["INDEX"]),
        pd.DataFrame(columns=nn_data_raw_mdi.columns[1:-1]),
    ]
)

for i, row in enumerate(X_test_mdi):
    data = [i]
    data.extend(row)
    result_df.loc[len(result_df["INDEX"])] = data

result_df["LABEL"] = y_test_mdi
result_df["PRED"] = classes_mdi

result_df.to_csv(result_dir / "ACHE/fe_rf_mdi_nn.csv", encoding="utf-8")