In [1]:
##################################################################################
##### Define all parameters for model tuning
##################################################################################

n_fold = 5
expName = "NT_Site_PredNTS_Classification_DLNN_AsimEmbedding_DPC"
outPath = "Results"
foldName = "folds.pickle"

shuffle = True
seed = None

input_data_folder = "Data_from_Asim"
file = "Protein_DPC[100, 0, 0, 0]-st-simplesequence.csv"

monitor = "val_loss"

In [2]:
import os 
import pickle
import numpy as np
import pandas as pd

import tensorflow as tf

from sklearn.metrics import roc_auc_score
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_curve, auc, accuracy_score, precision_score, confusion_matrix
from sklearn.metrics import roc_auc_score, matthews_corrcoef

import math

In [3]:
# for root, dirs, files in os.walk(input_data_folder):
#     for file in files:
#         print(file)

In [4]:
# print(tf.test.is_gpu_available(cuda_only=True))
# physical_devices = tf.config.experimental.list_physical_devices('GPU')
physical_devices = tf.config.list_physical_devices('GPU')
print(physical_devices)
tf.config.experimental.set_memory_growth(physical_devices[0], True)

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [5]:
##################################################################################
##### Build k-fold functions
##################################################################################

## Build the K-fold from dataset
def build_kfold(features, labels, k=10, shuffle=False, seed=None):
    
    skf = StratifiedKFold(n_splits=k, shuffle=shuffle, random_state=seed)
    kfoldList = []
    for train_index, test_index in skf.split(features, labels):
        X_train, X_test = features[train_index], features[test_index]
        y_train, y_test = labels[train_index], labels[test_index]
        kfoldList.append({
            "X_train": X_train,
            "X_test": X_test,
            "y_train":y_train,
            "y_test":y_test
        })
    return kfoldList

In [6]:
##################################################################################
##### define evaluator functions
##################################################################################

def pred2label(y_pred):
    y_pred = np.round(y_pred).astype(int)
    return y_pred

# Prepare Training and Independent data

In [7]:
input_data_file = os.path.join(input_data_folder, file)

data = pd.read_csv(input_data_file, sep=',', header=0)

train_data = data[data['set'] == 'train'].drop('set', axis=1)
independent_data = data[data['set'] == 'test'].drop('set', axis=1)

train_labels = np.array(train_data['labels'])
train_labels = train_labels.reshape((train_labels.shape[0], 1))

train_features = np.array(train_data.drop('labels', axis=1))

indpe_labels = np.array(independent_data['labels'])
indpe_labels = indpe_labels.reshape((indpe_labels.shape[0], 1))

indpe_features = np.array(independent_data.drop('labels', axis=1))

##################################################################################
##### extract data from the current fasta file
##################################################################################

print("\n======================================================================")
print("\nFile:", file)
print("Training Positive:", np.sum(train_labels))
print("Training Negative:", train_labels.shape[0] - np.sum(train_labels))
print("Independent Positive:", np.sum(indpe_labels))
print("Independent Negative:", indpe_labels.shape[0] - np.sum(indpe_labels))
print("Feature size:", train_features[0].shape)

##################################################################################
##### Generate Folds from dataset, and store to file
##################################################################################

## Generate the k-fold dataset
folds = build_kfold(train_features, train_labels, k=n_fold, shuffle=shuffle, seed=seed)

## Write the k-fold dataset to file
foldPath = os.path.join(outPath, expName, "{}fold".format(n_fold))
if(not os.path.isdir(foldPath)):
    os.makedirs(foldPath)
pickle.dump(folds, open(os.path.join(foldPath, foldName), "wb"))



File: Protein_DPC[100, 0, 0, 0]-st-simplesequence.csv
Training Positive: 1191
Training Negative: 1191
Independent Positive: 203
Independent Negative: 1022
Feature size: (400,)


# Model

In [8]:
epochs = 100
batch_size = 16

##################################################################################
##### Function to customize the DLNN architecture with parameters
##################################################################################

def DLNN_Classifier(input_vec_shape,
                    dense_decode_units = 32, ## Dense layer parameters
                    prob = 0.5, learn_rate = 0.0005, loss = 'binary_crossentropy', metrics = 'accuracy'):
    
    beta = 0.001
    
    input1 = tf.keras.layers.Input(shape=input_vec_shape)
    
    ######################################################################################################
    ########  Classifier  ################################################################################
    ######################################################################################################
    
    y = tf.keras.layers.Dense(dense_decode_units, 
                              kernel_regularizer = tf.keras.regularizers.l2(beta), 
                              activation = 'relu'
                             )(input1)
    
    y = tf.keras.layers.Dropout(prob)(y)
    
    y = tf.keras.layers.Dense(int(dense_decode_units/2), 
                              kernel_regularizer = tf.keras.regularizers.l2(beta), 
                              activation = 'relu'
                             )(y)
    
    y = tf.keras.layers.Dropout(prob)(y)
    
    y = tf.keras.layers.Dense(1, 
                              kernel_regularizer = tf.keras.regularizers.l2(beta), 
                              activation = 'sigmoid')(y)

    ## Generate Model from input and output
    model = tf.keras.models.Model(inputs=input1, outputs=y)
    
    ## Compile model
    if(metrics != None):
        model.compile(optimizer = tf.keras.optimizers.Adam(learning_rate=learn_rate), 
                      loss = loss, metrics = metrics)
    else:
        model.compile(optimizer = tf.keras.optimizers.Adam(learning_rate=learn_rate), 
                      loss = loss)

    return model

In [9]:
DLNN_Classifier((400,)).summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 400)]             0         
                                                                 
 dense (Dense)               (None, 32)                12832     
                                                                 
 dropout (Dropout)           (None, 32)                0         
                                                                 
 dense_1 (Dense)             (None, 16)                528       
                                                                 
 dropout_1 (Dropout)         (None, 16)                0         
                                                                 
 dense_2 (Dense)             (None, 1)                 17        
                                                                 
Total params: 13,377
Trainable params: 13,377
Non-trainable p

# Training

In [10]:
##################################################################################
##### For each input file, train model and generate different outputs in a structured folder
##################################################################################

## create the evaluation data structure for all iterations
evaluations = {
    "Fold" : [],
    "Train_Test" : [],
    "Accuracy" : [],
    "Precision": [],
    "TPR": [],
    "FPR": [],
    "TPR_FPR_Thresholds": [],
    "AUC": [],
    "Sensitivity": [],
    "Specificity": [],
    "MCC":[]
}

## Create and set directory to save model
modelPath = os.path.join(outPath, expName, "{}fold".format(n_fold), "models")
if(not os.path.isdir(modelPath)):
    os.makedirs(modelPath)

##################################################################################
##### TRAIN and PREDICT for every Fold, using models
##################################################################################

# fold counter
i = 0

for fold in folds:

    # adding random shuffling of the dataset for training purpose
    randomized_index_arr = np.arange(fold["X_train"].shape[0])
    randomized_index_arr = np.random.permutation(randomized_index_arr)

    print("\nTrain/Test model on Fold #"+str(i)+".")

    input_size = fold["X_train"][0].shape
    ## Generate model using function
    model = DLNN_Classifier(input_vec_shape = input_size)

    model_file_path = os.path.join(modelPath, "bestModel-fold{}.hdf5".format(i))
    ## Define the model callbacks for early stopping and saving the model. Then train model
    modelCallbacks = [
        tf.keras.callbacks.ModelCheckpoint(model_file_path,
                                           monitor = monitor, verbose = 1, save_best_only = True, 
                                           save_weights_only = False, mode = 'auto', save_freq = 'epoch'),
    ]
    model.fit(x = fold["X_train"][randomized_index_arr], y = fold["y_train"][randomized_index_arr], batch_size = batch_size, epochs = epochs, verbose = 1, 
              callbacks = modelCallbacks, validation_data = (fold["X_test"], fold["y_test"]))

    model = tf.keras.models.load_model(model_file_path)

    ##################################################################################
    ##### Prediction and metrics for TRAIN dataset
    ##################################################################################

    y_pred = model.predict(fold["X_train"])
    label_pred = pred2label(y_pred)
    # Compute precision, recall, sensitivity, specifity, mcc
    acc = accuracy_score(fold["y_train"], label_pred)
    prec = precision_score(fold["y_train"],label_pred)

    mcc = matthews_corrcoef(fold["y_train"], label_pred)

    conf = confusion_matrix(fold["y_train"], label_pred)
    tn, fp, fn, tp = conf.ravel()
    sens = tp/(tp+fn)
    spec = tn/(tn+fp)

    fpr, tpr, thresholds = roc_curve(fold["y_train"], y_pred)
    auc = roc_auc_score(fold["y_train"], y_pred)

    evaluations["Fold"].append(i)
    evaluations["Train_Test"].append("Train")
    evaluations["Accuracy"].append(acc)
    evaluations["Precision"].append(prec)
    evaluations["TPR"].append(tpr)
    evaluations["FPR"].append(fpr)
    evaluations["TPR_FPR_Thresholds"].append(thresholds)
    evaluations["AUC"].append(auc)
    evaluations["Sensitivity"].append(sens)
    evaluations["Specificity"].append(spec)
    evaluations["MCC"].append(mcc)

    ##################################################################################
    ##### Prediction and metrics for TEST dataset
    ##################################################################################

    y_pred = model.predict(fold["X_test"])
    label_pred = pred2label(y_pred)
    # Compute precision, recall, sensitivity, specifity, mcc
    acc = accuracy_score(fold["y_test"], label_pred)
    prec = precision_score(fold["y_test"],label_pred)
    mcc = matthews_corrcoef(fold["y_test"], label_pred)

    conf = confusion_matrix(fold["y_test"], label_pred)
    tn, fp, fn, tp = conf.ravel()
    sens = tp/(tp+fn)
    spec = tn/(tn+fp)

    fpr, tpr, thresholds = roc_curve(fold["y_test"], y_pred)
    auc = roc_auc_score(fold["y_test"], y_pred)

    evaluations["Fold"].append(i)
    evaluations["Train_Test"].append("Test")
    evaluations["Accuracy"].append(acc)
    evaluations["Precision"].append(prec)
    evaluations["TPR"].append(tpr)
    evaluations["FPR"].append(fpr)
    evaluations["TPR_FPR_Thresholds"].append(thresholds)
    evaluations["AUC"].append(auc)
    evaluations["Sensitivity"].append(sens)
    evaluations["Specificity"].append(spec)
    evaluations["MCC"].append(mcc)

    i = i+1
    del model
    tf.keras.backend.clear_session()


Train/Test model on Fold #0.
Epoch 1/100
Epoch 1: val_loss improved from inf to 0.73420, saving model to Results\NT_Site_PredNTS_Classification_DLNN_AsimEmbedding_DPC\5fold\models\bestModel-fold0.hdf5
Epoch 2/100
Epoch 2: val_loss improved from 0.73420 to 0.71515, saving model to Results\NT_Site_PredNTS_Classification_DLNN_AsimEmbedding_DPC\5fold\models\bestModel-fold0.hdf5
Epoch 3/100
Epoch 3: val_loss improved from 0.71515 to 0.70512, saving model to Results\NT_Site_PredNTS_Classification_DLNN_AsimEmbedding_DPC\5fold\models\bestModel-fold0.hdf5
Epoch 4/100
Epoch 4: val_loss improved from 0.70512 to 0.69893, saving model to Results\NT_Site_PredNTS_Classification_DLNN_AsimEmbedding_DPC\5fold\models\bestModel-fold0.hdf5
Epoch 5/100
Epoch 5: val_loss improved from 0.69893 to 0.69509, saving model to Results\NT_Site_PredNTS_Classification_DLNN_AsimEmbedding_DPC\5fold\models\bestModel-fold0.hdf5
Epoch 6/100
Epoch 6: val_loss improved from 0.69509 to 0.68994, saving model to Results\NT_Sit

Epoch 23/100
Epoch 23: val_loss did not improve from 0.60923
Epoch 24/100
Epoch 24: val_loss did not improve from 0.60923
Epoch 25/100
Epoch 25: val_loss did not improve from 0.60923
Epoch 26/100
Epoch 26: val_loss did not improve from 0.60923
Epoch 27/100
Epoch 27: val_loss improved from 0.60923 to 0.60742, saving model to Results\NT_Site_PredNTS_Classification_DLNN_AsimEmbedding_DPC\5fold\models\bestModel-fold0.hdf5
Epoch 28/100
Epoch 28: val_loss did not improve from 0.60742
Epoch 29/100
Epoch 29: val_loss did not improve from 0.60742
Epoch 30/100
Epoch 30: val_loss did not improve from 0.60742
Epoch 31/100
Epoch 31: val_loss did not improve from 0.60742
Epoch 32/100
Epoch 32: val_loss did not improve from 0.60742
Epoch 33/100
Epoch 33: val_loss did not improve from 0.60742
Epoch 34/100
Epoch 34: val_loss did not improve from 0.60742
Epoch 35/100
Epoch 35: val_loss did not improve from 0.60742
Epoch 36/100
Epoch 36: val_loss did not improve from 0.60742
Epoch 37/100
Epoch 37: val_lo

Epoch 52/100
Epoch 52: val_loss did not improve from 0.60742
Epoch 53/100
Epoch 53: val_loss did not improve from 0.60742
Epoch 54/100
Epoch 54: val_loss did not improve from 0.60742
Epoch 55/100
Epoch 55: val_loss did not improve from 0.60742
Epoch 56/100
Epoch 56: val_loss did not improve from 0.60742
Epoch 57/100
Epoch 57: val_loss did not improve from 0.60742
Epoch 58/100
Epoch 58: val_loss did not improve from 0.60742
Epoch 59/100
Epoch 59: val_loss did not improve from 0.60742
Epoch 60/100
Epoch 60: val_loss did not improve from 0.60742
Epoch 61/100
Epoch 61: val_loss did not improve from 0.60742
Epoch 62/100
Epoch 62: val_loss did not improve from 0.60742
Epoch 63/100
Epoch 63: val_loss did not improve from 0.60742
Epoch 64/100
Epoch 64: val_loss did not improve from 0.60742
Epoch 65/100
Epoch 65: val_loss did not improve from 0.60742
Epoch 66/100
Epoch 66: val_loss did not improve from 0.60742
Epoch 67/100
Epoch 67: val_loss did not improve from 0.60742
Epoch 68/100
Epoch 68: v

Epoch 82/100
Epoch 82: val_loss did not improve from 0.60742
Epoch 83/100
Epoch 83: val_loss did not improve from 0.60742
Epoch 84/100
Epoch 84: val_loss did not improve from 0.60742
Epoch 85/100
Epoch 85: val_loss did not improve from 0.60742
Epoch 86/100
Epoch 86: val_loss did not improve from 0.60742
Epoch 87/100
Epoch 87: val_loss did not improve from 0.60742
Epoch 88/100
Epoch 88: val_loss did not improve from 0.60742
Epoch 89/100
Epoch 89: val_loss did not improve from 0.60742
Epoch 90/100
Epoch 90: val_loss did not improve from 0.60742
Epoch 91/100
Epoch 91: val_loss did not improve from 0.60742
Epoch 92/100
Epoch 92: val_loss did not improve from 0.60742
Epoch 93/100
Epoch 93: val_loss did not improve from 0.60742
Epoch 94/100
Epoch 94: val_loss did not improve from 0.60742
Epoch 95/100
Epoch 95: val_loss did not improve from 0.60742
Epoch 96/100
Epoch 96: val_loss did not improve from 0.60742
Epoch 97/100
Epoch 97: val_loss did not improve from 0.60742
Epoch 98/100
Epoch 98: v

Epoch 9/100
Epoch 9: val_loss improved from 0.69042 to 0.68815, saving model to Results\NT_Site_PredNTS_Classification_DLNN_AsimEmbedding_DPC\5fold\models\bestModel-fold1.hdf5
Epoch 10/100
Epoch 10: val_loss improved from 0.68815 to 0.68510, saving model to Results\NT_Site_PredNTS_Classification_DLNN_AsimEmbedding_DPC\5fold\models\bestModel-fold1.hdf5
Epoch 11/100
Epoch 11: val_loss improved from 0.68510 to 0.68148, saving model to Results\NT_Site_PredNTS_Classification_DLNN_AsimEmbedding_DPC\5fold\models\bestModel-fold1.hdf5
Epoch 12/100
Epoch 12: val_loss improved from 0.68148 to 0.67547, saving model to Results\NT_Site_PredNTS_Classification_DLNN_AsimEmbedding_DPC\5fold\models\bestModel-fold1.hdf5
Epoch 13/100
Epoch 13: val_loss improved from 0.67547 to 0.66974, saving model to Results\NT_Site_PredNTS_Classification_DLNN_AsimEmbedding_DPC\5fold\models\bestModel-fold1.hdf5
Epoch 14/100
Epoch 14: val_loss improved from 0.66974 to 0.66216, saving model to Results\NT_Site_PredNTS_Classi

Epoch 30/100
Epoch 30: val_loss improved from 0.59856 to 0.59719, saving model to Results\NT_Site_PredNTS_Classification_DLNN_AsimEmbedding_DPC\5fold\models\bestModel-fold1.hdf5
Epoch 31/100
Epoch 31: val_loss improved from 0.59719 to 0.59537, saving model to Results\NT_Site_PredNTS_Classification_DLNN_AsimEmbedding_DPC\5fold\models\bestModel-fold1.hdf5
Epoch 32/100
Epoch 32: val_loss did not improve from 0.59537
Epoch 33/100
Epoch 33: val_loss did not improve from 0.59537
Epoch 34/100
Epoch 34: val_loss did not improve from 0.59537
Epoch 35/100
Epoch 35: val_loss did not improve from 0.59537
Epoch 36/100
Epoch 36: val_loss did not improve from 0.59537
Epoch 37/100
Epoch 37: val_loss did not improve from 0.59537
Epoch 38/100
Epoch 38: val_loss did not improve from 0.59537
Epoch 39/100
Epoch 39: val_loss did not improve from 0.59537
Epoch 40/100
Epoch 40: val_loss did not improve from 0.59537
Epoch 41/100
Epoch 41: val_loss did not improve from 0.59537
Epoch 42/100
Epoch 42: val_loss di

Epoch 88/100
Epoch 88: val_loss did not improve from 0.59537
Epoch 89/100
Epoch 89: val_loss did not improve from 0.59537
Epoch 90/100
Epoch 90: val_loss did not improve from 0.59537
Epoch 91/100
Epoch 91: val_loss did not improve from 0.59537
Epoch 92/100
Epoch 92: val_loss did not improve from 0.59537
Epoch 93/100
Epoch 93: val_loss did not improve from 0.59537
Epoch 94/100
Epoch 94: val_loss did not improve from 0.59537
Epoch 95/100
Epoch 95: val_loss did not improve from 0.59537
Epoch 96/100
Epoch 96: val_loss did not improve from 0.59537
Epoch 97/100
Epoch 97: val_loss did not improve from 0.59537
Epoch 98/100
Epoch 98: val_loss did not improve from 0.59537
Epoch 99/100
Epoch 99: val_loss did not improve from 0.59537
Epoch 100/100
Epoch 100: val_loss did not improve from 0.59537

Train/Test model on Fold #2.
Epoch 1/100
Epoch 1: val_loss improved from inf to 0.73125, saving model to Results\NT_Site_PredNTS_Classification_DLNN_AsimEmbedding_DPC\5fold\models\bestModel-fold2.hdf5
Epo

Epoch 13/100
Epoch 13: val_loss improved from 0.66115 to 0.65377, saving model to Results\NT_Site_PredNTS_Classification_DLNN_AsimEmbedding_DPC\5fold\models\bestModel-fold2.hdf5
Epoch 14/100
Epoch 14: val_loss improved from 0.65377 to 0.65243, saving model to Results\NT_Site_PredNTS_Classification_DLNN_AsimEmbedding_DPC\5fold\models\bestModel-fold2.hdf5
Epoch 15/100
Epoch 15: val_loss improved from 0.65243 to 0.64977, saving model to Results\NT_Site_PredNTS_Classification_DLNN_AsimEmbedding_DPC\5fold\models\bestModel-fold2.hdf5
Epoch 16/100
Epoch 16: val_loss did not improve from 0.64977
Epoch 17/100
Epoch 17: val_loss did not improve from 0.64977
Epoch 18/100
Epoch 18: val_loss did not improve from 0.64977
Epoch 19/100
Epoch 19: val_loss improved from 0.64977 to 0.64769, saving model to Results\NT_Site_PredNTS_Classification_DLNN_AsimEmbedding_DPC\5fold\models\bestModel-fold2.hdf5
Epoch 20/100
Epoch 20: val_loss did not improve from 0.64769
Epoch 21/100
Epoch 21: val_loss did not impr

Epoch 41/100
Epoch 41: val_loss did not improve from 0.64769
Epoch 42/100
Epoch 42: val_loss did not improve from 0.64769
Epoch 43/100
Epoch 43: val_loss did not improve from 0.64769
Epoch 44/100
Epoch 44: val_loss did not improve from 0.64769
Epoch 45/100
Epoch 45: val_loss did not improve from 0.64769
Epoch 46/100
Epoch 46: val_loss did not improve from 0.64769
Epoch 47/100
Epoch 47: val_loss did not improve from 0.64769
Epoch 48/100
Epoch 48: val_loss did not improve from 0.64769
Epoch 49/100
Epoch 49: val_loss did not improve from 0.64769
Epoch 50/100
Epoch 50: val_loss did not improve from 0.64769
Epoch 51/100
Epoch 51: val_loss did not improve from 0.64769
Epoch 52/100
Epoch 52: val_loss did not improve from 0.64769
Epoch 53/100
Epoch 53: val_loss did not improve from 0.64769
Epoch 54/100
Epoch 54: val_loss did not improve from 0.64769
Epoch 55/100
Epoch 55: val_loss did not improve from 0.64769
Epoch 56/100
Epoch 56: val_loss did not improve from 0.64769
Epoch 57/100
Epoch 57: v

Epoch 100/100
Epoch 100: val_loss did not improve from 0.64769

Train/Test model on Fold #3.
Epoch 1/100
Epoch 1: val_loss improved from inf to 0.73395, saving model to Results\NT_Site_PredNTS_Classification_DLNN_AsimEmbedding_DPC\5fold\models\bestModel-fold3.hdf5
Epoch 2/100
Epoch 2: val_loss improved from 0.73395 to 0.71401, saving model to Results\NT_Site_PredNTS_Classification_DLNN_AsimEmbedding_DPC\5fold\models\bestModel-fold3.hdf5
Epoch 3/100
Epoch 3: val_loss improved from 0.71401 to 0.70416, saving model to Results\NT_Site_PredNTS_Classification_DLNN_AsimEmbedding_DPC\5fold\models\bestModel-fold3.hdf5
Epoch 4/100
Epoch 4: val_loss improved from 0.70416 to 0.69850, saving model to Results\NT_Site_PredNTS_Classification_DLNN_AsimEmbedding_DPC\5fold\models\bestModel-fold3.hdf5
Epoch 5/100
Epoch 5: val_loss improved from 0.69850 to 0.69408, saving model to Results\NT_Site_PredNTS_Classification_DLNN_AsimEmbedding_DPC\5fold\models\bestModel-fold3.hdf5
Epoch 6/100
Epoch 6: val_loss i

Epoch 22/100
Epoch 22: val_loss did not improve from 0.61200
Epoch 23/100
Epoch 23: val_loss improved from 0.61200 to 0.61080, saving model to Results\NT_Site_PredNTS_Classification_DLNN_AsimEmbedding_DPC\5fold\models\bestModel-fold3.hdf5
Epoch 24/100
Epoch 24: val_loss improved from 0.61080 to 0.60985, saving model to Results\NT_Site_PredNTS_Classification_DLNN_AsimEmbedding_DPC\5fold\models\bestModel-fold3.hdf5
Epoch 25/100
Epoch 25: val_loss did not improve from 0.60985
Epoch 26/100
Epoch 26: val_loss did not improve from 0.60985
Epoch 27/100
Epoch 27: val_loss did not improve from 0.60985
Epoch 28/100
Epoch 28: val_loss did not improve from 0.60985
Epoch 29/100
Epoch 29: val_loss did not improve from 0.60985
Epoch 30/100
Epoch 30: val_loss did not improve from 0.60985
Epoch 31/100
Epoch 31: val_loss did not improve from 0.60985
Epoch 32/100
Epoch 32: val_loss did not improve from 0.60985
Epoch 33/100
Epoch 33: val_loss did not improve from 0.60985
Epoch 34/100
Epoch 34: val_loss di

Epoch 80/100
Epoch 80: val_loss did not improve from 0.60985
Epoch 81/100
Epoch 81: val_loss did not improve from 0.60985
Epoch 82/100
Epoch 82: val_loss did not improve from 0.60985
Epoch 83/100
Epoch 83: val_loss did not improve from 0.60985
Epoch 84/100
Epoch 84: val_loss did not improve from 0.60985
Epoch 85/100
Epoch 85: val_loss did not improve from 0.60985
Epoch 86/100
Epoch 86: val_loss did not improve from 0.60985
Epoch 87/100
Epoch 87: val_loss did not improve from 0.60985
Epoch 88/100
Epoch 88: val_loss did not improve from 0.60985
Epoch 89/100
Epoch 89: val_loss did not improve from 0.60985
Epoch 90/100
Epoch 90: val_loss did not improve from 0.60985
Epoch 91/100
Epoch 91: val_loss did not improve from 0.60985
Epoch 92/100
Epoch 92: val_loss did not improve from 0.60985
Epoch 93/100
Epoch 93: val_loss did not improve from 0.60985
Epoch 94/100
Epoch 94: val_loss did not improve from 0.60985
Epoch 95/100
Epoch 95: val_loss did not improve from 0.60985
Epoch 96/100
Epoch 96: v

Epoch 7/100
Epoch 7: val_loss improved from 0.68977 to 0.68466, saving model to Results\NT_Site_PredNTS_Classification_DLNN_AsimEmbedding_DPC\5fold\models\bestModel-fold4.hdf5
Epoch 8/100
Epoch 8: val_loss improved from 0.68466 to 0.67934, saving model to Results\NT_Site_PredNTS_Classification_DLNN_AsimEmbedding_DPC\5fold\models\bestModel-fold4.hdf5
Epoch 9/100
Epoch 9: val_loss improved from 0.67934 to 0.67270, saving model to Results\NT_Site_PredNTS_Classification_DLNN_AsimEmbedding_DPC\5fold\models\bestModel-fold4.hdf5
Epoch 10/100
Epoch 10: val_loss improved from 0.67270 to 0.66509, saving model to Results\NT_Site_PredNTS_Classification_DLNN_AsimEmbedding_DPC\5fold\models\bestModel-fold4.hdf5
Epoch 11/100
Epoch 11: val_loss improved from 0.66509 to 0.65677, saving model to Results\NT_Site_PredNTS_Classification_DLNN_AsimEmbedding_DPC\5fold\models\bestModel-fold4.hdf5
Epoch 12/100
Epoch 12: val_loss improved from 0.65677 to 0.64992, saving model to Results\NT_Site_PredNTS_Classifica

Epoch 31/100
Epoch 31: val_loss did not improve from 0.62014
Epoch 32/100
Epoch 32: val_loss did not improve from 0.62014
Epoch 33/100
Epoch 33: val_loss did not improve from 0.62014
Epoch 34/100
Epoch 34: val_loss did not improve from 0.62014
Epoch 35/100
Epoch 35: val_loss did not improve from 0.62014
Epoch 36/100
Epoch 36: val_loss did not improve from 0.62014
Epoch 37/100
Epoch 37: val_loss did not improve from 0.62014
Epoch 38/100
Epoch 38: val_loss did not improve from 0.62014
Epoch 39/100
Epoch 39: val_loss did not improve from 0.62014
Epoch 40/100
Epoch 40: val_loss did not improve from 0.62014
Epoch 41/100
Epoch 41: val_loss did not improve from 0.62014
Epoch 42/100
Epoch 42: val_loss did not improve from 0.62014
Epoch 43/100
Epoch 43: val_loss did not improve from 0.62014
Epoch 44/100
Epoch 44: val_loss did not improve from 0.62014
Epoch 45/100
Epoch 45: val_loss did not improve from 0.62014
Epoch 46/100
Epoch 46: val_loss did not improve from 0.62014
Epoch 47/100
Epoch 47: v

Epoch 61/100
Epoch 61: val_loss did not improve from 0.62014
Epoch 62/100
Epoch 62: val_loss did not improve from 0.62014
Epoch 63/100
Epoch 63: val_loss did not improve from 0.62014
Epoch 64/100
Epoch 64: val_loss did not improve from 0.62014
Epoch 65/100
Epoch 65: val_loss did not improve from 0.62014
Epoch 66/100
Epoch 66: val_loss did not improve from 0.62014
Epoch 67/100
Epoch 67: val_loss did not improve from 0.62014
Epoch 68/100
Epoch 68: val_loss did not improve from 0.62014
Epoch 69/100
Epoch 69: val_loss did not improve from 0.62014
Epoch 70/100
Epoch 70: val_loss did not improve from 0.62014
Epoch 71/100
Epoch 71: val_loss did not improve from 0.62014
Epoch 72/100
Epoch 72: val_loss did not improve from 0.62014
Epoch 73/100
Epoch 73: val_loss did not improve from 0.62014
Epoch 74/100
Epoch 74: val_loss did not improve from 0.62014
Epoch 75/100
Epoch 75: val_loss did not improve from 0.62014
Epoch 76/100
Epoch 76: val_loss did not improve from 0.62014
Epoch 77/100
Epoch 77: v

Epoch 91/100
Epoch 91: val_loss did not improve from 0.62014
Epoch 92/100
Epoch 92: val_loss did not improve from 0.62014
Epoch 93/100
Epoch 93: val_loss did not improve from 0.62014
Epoch 94/100
Epoch 94: val_loss did not improve from 0.62014
Epoch 95/100
Epoch 95: val_loss did not improve from 0.62014
Epoch 96/100
Epoch 96: val_loss did not improve from 0.62014
Epoch 97/100
Epoch 97: val_loss did not improve from 0.62014
Epoch 98/100
Epoch 98: val_loss did not improve from 0.62014
Epoch 99/100
Epoch 99: val_loss did not improve from 0.62014
Epoch 100/100
Epoch 100: val_loss did not improve from 0.62014


## k-fold Training evaluation

In [11]:
evaluations_df = pd.DataFrame.from_dict(evaluations)

evaluations_df_grouped = evaluations_df.groupby(["Train_Test"]).mean().filter(['Accuracy', 
                                                                               'Precision', 
                                                                               'AUC', 
                                                                               'Sensitivity', 
                                                                               'Specificity', 
                                                                               'MCC'])

evaluations_df_grouped

Unnamed: 0_level_0,Accuracy,Precision,AUC,Sensitivity,Specificity,MCC
Train_Test,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Test,0.738433,0.743514,0.811382,0.728772,0.748064,0.477078
Train,0.811293,0.818572,0.886081,0.800583,0.821999,0.623148


In [13]:
evaluations_df

Unnamed: 0,Fold,Train_Test,Accuracy,Precision,TPR,FPR,TPR_FPR_Thresholds,AUC,Sensitivity,Specificity,MCC
0,0,Train,0.807874,0.826281,"[0.0, 0.0010504201680672268, 0.093487394957983...","[0.0, 0.0, 0.0, 0.001049317943336831, 0.001049...","[1.9559121, 0.95591205, 0.8529623, 0.8524899, ...",0.888194,0.779412,0.836306,0.616728
1,0,Test,0.756813,0.755187,"[0.0, 0.0041841004184100415, 0.096234309623430...","[0.0, 0.0, 0.0, 0.004201680672268907, 0.004201...","[1.9356874, 0.9356874, 0.8471316, 0.8462839, 0...",0.822756,0.761506,0.752101,0.513634
2,1,Train,0.811024,0.820541,"[0.0, 0.001049317943336831, 0.0566631689401888...","[0.0, 0.0, 0.0, 0.0010504201680672268, 0.00105...","[1.8946393, 0.89463925, 0.80224484, 0.802165, ...",0.883003,0.796432,0.82563,0.622322
3,1,Test,0.773585,0.790179,"[0.0, 0.004201680672268907, 0.0504201680672268...","[0.0, 0.0, 0.0, 0.0041841004184100415, 0.00418...","[1.871835, 0.871835, 0.81341875, 0.8111316, 0....",0.838842,0.743697,0.803347,0.548057
4,2,Train,0.811647,0.800607,"[0.0, 0.001049317943336831, 0.0545645330535152...","[0.0, 0.0, 0.0, 0.001049317943336831, 0.001049...","[1.8734345, 0.8734345, 0.81518096, 0.8150997, ...",0.885843,0.83001,0.793284,0.623716
5,2,Test,0.695378,0.697872,"[0.0, 0.004201680672268907, 0.0714285714285714...","[0.0, 0.0, 0.0, 0.004201680672268907, 0.004201...","[1.8938842, 0.8938842, 0.80379283, 0.8028052, ...",0.771715,0.689076,0.701681,0.390787
6,3,Train,0.816894,0.837054,"[0.0, 0.001049317943336831, 0.0818467995802728...","[0.0, 0.0, 0.0, 0.001049317943336831, 0.001049...","[1.9226935, 0.9226935, 0.842219, 0.8418051, 0....",0.88746,0.786988,0.8468,0.634925
7,3,Test,0.737395,0.740426,"[0.0, 0.004201680672268907, 0.0588235294117647...","[0.0, 0.0, 0.0, 0.004201680672268907, 0.004201...","[1.9086914, 0.90869147, 0.8426451, 0.8413046, ...",0.816238,0.731092,0.743697,0.474828
8,4,Train,0.809024,0.808377,"[0.0, 0.001049317943336831, 0.0902413431269674...","[0.0, 0.0, 0.0, 0.001049317943336831, 0.001049...","[1.926738, 0.92673796, 0.82737124, 0.8263836, ...",0.885907,0.810073,0.807975,0.61805
9,4,Test,0.728992,0.733906,"[0.0, 0.004201680672268907, 0.0714285714285714...","[0.0, 0.0, 0.0, 0.004201680672268907, 0.004201...","[1.9062726, 0.9062727, 0.84050953, 0.83981305,...",0.807358,0.718487,0.739496,0.458084


# Independent data

In [14]:
## create the evaluation data structure for all iterations
evaluations = {
    "Train_Test" : [],
    "Accuracy" : [],
    "Precision": [],
    "TPR": [],
    "FPR": [],
    "TPR_FPR_Thresholds": [],
    "AUC": [],
    "Sensitivity": [],
    "Specificity": [],
    "MCC":[]
}

##################################################################################
##### Independent Data performance
##################################################################################

print("\nIndependent evaluation for model.")

# adding random shuffling of the dataset for training purpose
randomized_index_arr = np.arange(train_features.shape[0])
randomized_index_arr = np.random.permutation(randomized_index_arr)

input_size = train_features[0].shape
## Generate model using function
model = DLNN_Classifier(input_vec_shape = input_size)

model_file_path = os.path.join(modelPath, "bestModel-full.hdf5")
## Define the model callbacks for early stopping and saving the model. Then train model
modelCallbacks = [
    tf.keras.callbacks.ModelCheckpoint(model_file_path,
                                       monitor = monitor, verbose = 1, save_best_only = True, 
                                       save_weights_only = False, mode = 'auto', save_freq = 'epoch'),
]
model.fit(x = train_features[randomized_index_arr], y = train_labels[randomized_index_arr], batch_size = batch_size, epochs = epochs, verbose = 1, 
          callbacks = modelCallbacks, validation_data = (indpe_features, indpe_labels))

model = tf.keras.models.load_model(model_file_path)

##################################################################################
##### Prediction and metrics for Train dataset
##################################################################################

y_pred = model.predict(train_features)
label_pred = pred2label(y_pred)

# Compute precision, recall, sensitivity, specifity, mcc
acc = accuracy_score(train_labels, label_pred)
prec = precision_score(train_labels,label_pred)
mcc = matthews_corrcoef(train_labels, label_pred)

conf = confusion_matrix(train_labels, label_pred)
tn, fp, fn, tp = conf.ravel()
sens = tp/(tp+fn)
spec = tn/(tn+fp)

fpr, tpr, thresholds = roc_curve(train_labels, y_pred)
auc = roc_auc_score(train_labels, y_pred)

evaluations["Train_Test"].append("Train")
evaluations["Accuracy"].append(acc)
evaluations["Precision"].append(prec)
evaluations["TPR"].append(tpr)
evaluations["FPR"].append(fpr)
evaluations["TPR_FPR_Thresholds"].append(thresholds)
evaluations["AUC"].append(auc)
evaluations["Sensitivity"].append(sens)
evaluations["Specificity"].append(spec)
evaluations["MCC"].append(mcc)

##################################################################################
##### Prediction and metrics for Independent dataset
##################################################################################

y_pred = model.predict(indpe_features)
label_pred = pred2label(y_pred)

# Compute precision, recall, sensitivity, specifity, mcc
acc = accuracy_score(indpe_labels, label_pred)
prec = precision_score(indpe_labels,label_pred)
mcc = matthews_corrcoef(indpe_labels, label_pred)

conf = confusion_matrix(indpe_labels, label_pred)
tn, fp, fn, tp = conf.ravel()
sens = tp/(tp+fn)
spec = tn/(tn+fp)

fpr, tpr, thresholds = roc_curve(indpe_labels, y_pred)
auc = roc_auc_score(indpe_labels, y_pred)

evaluations["Train_Test"].append("Independent")
evaluations["Accuracy"].append(acc)
evaluations["Precision"].append(prec)
evaluations["TPR"].append(tpr)
evaluations["FPR"].append(fpr)
evaluations["TPR_FPR_Thresholds"].append(thresholds)
evaluations["AUC"].append(auc)
evaluations["Sensitivity"].append(sens)
evaluations["Specificity"].append(spec)
evaluations["MCC"].append(mcc)

del model
tf.keras.backend.clear_session()


Independent evaluation for model.
Epoch 1/100
Epoch 1: val_loss improved from inf to 0.72576, saving model to Results\NT_Site_PredNTS_Classification_DLNN_AsimEmbedding_DPC\5fold\models\bestModel-full.hdf5
Epoch 2/100
Epoch 2: val_loss improved from 0.72576 to 0.70497, saving model to Results\NT_Site_PredNTS_Classification_DLNN_AsimEmbedding_DPC\5fold\models\bestModel-full.hdf5
Epoch 3/100
Epoch 3: val_loss did not improve from 0.70497
Epoch 4/100
Epoch 4: val_loss improved from 0.70497 to 0.69510, saving model to Results\NT_Site_PredNTS_Classification_DLNN_AsimEmbedding_DPC\5fold\models\bestModel-full.hdf5
Epoch 5/100
Epoch 5: val_loss did not improve from 0.69510
Epoch 6/100
Epoch 6: val_loss did not improve from 0.69510
Epoch 7/100
Epoch 7: val_loss did not improve from 0.69510
Epoch 8/100
Epoch 8: val_loss did not improve from 0.69510
Epoch 9/100
Epoch 9: val_loss did not improve from 0.69510
Epoch 10/100
Epoch 10: val_loss did not improve from 0.69510
Epoch 11/100
Epoch 11: val_lo

Epoch 59/100
Epoch 59: val_loss did not improve from 0.69510
Epoch 60/100
Epoch 60: val_loss did not improve from 0.69510
Epoch 61/100
Epoch 61: val_loss did not improve from 0.69510
Epoch 62/100
Epoch 62: val_loss did not improve from 0.69510
Epoch 63/100
Epoch 63: val_loss did not improve from 0.69510
Epoch 64/100
Epoch 64: val_loss did not improve from 0.69510
Epoch 65/100
Epoch 65: val_loss did not improve from 0.69510
Epoch 66/100
Epoch 66: val_loss did not improve from 0.69510
Epoch 67/100
Epoch 67: val_loss did not improve from 0.69510
Epoch 68/100
Epoch 68: val_loss did not improve from 0.69510
Epoch 69/100
Epoch 69: val_loss did not improve from 0.69510
Epoch 70/100
Epoch 70: val_loss did not improve from 0.69510
Epoch 71/100
Epoch 71: val_loss did not improve from 0.69510
Epoch 72/100
Epoch 72: val_loss did not improve from 0.69510
Epoch 73/100
Epoch 73: val_loss did not improve from 0.69510
Epoch 74/100
Epoch 74: val_loss did not improve from 0.69510
Epoch 75/100
Epoch 75: v

Epoch 89/100
Epoch 89: val_loss did not improve from 0.69510
Epoch 90/100
Epoch 90: val_loss did not improve from 0.69510
Epoch 91/100
Epoch 91: val_loss did not improve from 0.69510
Epoch 92/100
Epoch 92: val_loss did not improve from 0.69510
Epoch 93/100
Epoch 93: val_loss did not improve from 0.69510
Epoch 94/100
Epoch 94: val_loss did not improve from 0.69510
Epoch 95/100
Epoch 95: val_loss did not improve from 0.69510
Epoch 96/100
Epoch 96: val_loss did not improve from 0.69510
Epoch 97/100
Epoch 97: val_loss did not improve from 0.69510
Epoch 98/100
Epoch 98: val_loss did not improve from 0.69510
Epoch 99/100
Epoch 99: val_loss did not improve from 0.69510
Epoch 100/100
Epoch 100: val_loss did not improve from 0.69510


In [15]:
evaluations_df = pd.DataFrame.from_dict(evaluations)
evaluations_df

Unnamed: 0,Train_Test,Accuracy,Precision,TPR,FPR,TPR_FPR_Thresholds,AUC,Sensitivity,Specificity,MCC
0,Train,0.70487,0.851585,"[0.0, 0.0008396305625524769, 0.040302267002518...","[0.0, 0.0, 0.0, 0.0008396305625524769, 0.00083...","[1.5140159, 0.514016, 0.5087028, 0.50868607, 0...",0.823675,0.496222,0.913518,0.450873
1,Independent,0.720816,0.264407,"[0.0, 0.0049261083743842365, 0.009852216748768...","[0.0, 0.0, 0.0, 0.0019569471624266144, 0.00195...","[1.5135704, 0.5135704, 0.51356655, 0.51249707,...",0.636355,0.384236,0.787671,0.149491
