In [1]:
##################################################################################
##### Define all parameters for model tuning
##################################################################################

n_fold = 5
expName = "NT_Site_PredNTS_Classification_DLNN_Kmer"
outPath = "Results"
foldName = "folds.pickle"

shuffle = True
seed = None

input_data_folder = "PredNTS_MathFeature_ENC"

# monitor = 'val_loss'

In [2]:
train_data_filename = 'Training-datasets-PredNTS_kmer.csv'
indpe_data_filename = 'independent-dataset-PredNTS_kmer.csv'

In [3]:
import os 
import pickle
import numpy as np
import pandas as pd

import tensorflow as tf

from sklearn.metrics import roc_auc_score
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_curve, auc, accuracy_score, precision_score, confusion_matrix
from sklearn.metrics import roc_auc_score, classification_report, matthews_corrcoef

import math

In [4]:
# print(tf.test.is_gpu_available(cuda_only=True))
# physical_devices = tf.config.experimental.list_physical_devices('GPU')
physical_devices = tf.config.list_physical_devices('GPU')
print(physical_devices)
tf.config.experimental.set_memory_growth(physical_devices[0], True)

gpus = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(gpus[0], True)

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [5]:
# ##################################################################################
# ##### define all CUSTOM functions
# ##################################################################################

# def one_hot_encode_nt(sequence, char_dict):
    
#     seq_encoded = np.zeros((len(sequence),len(char_dict)))
    
#     i = 0
#     for single_character in sequence:
#         if(single_character.upper() in char_dict.keys()):
#             seq_encoded[i][char_dict[single_character.upper()]] = 1
#             i = i+1
#         else:
#             raise ValueError('Incorrect character in NT sequence: '+sequence)
#     return seq_encoded

In [6]:
##################################################################################
##### Build k-fold functions
##################################################################################

## Build the K-fold from dataset
def build_kfold(features, labels, k=10, shuffle=False, seed=None):
    
    skf = StratifiedKFold(n_splits=k, shuffle=shuffle, random_state=seed)
    kfoldList = []
    for train_index, test_index in skf.split(features, labels):
        X_train, X_test = features[train_index], features[test_index]
        y_train, y_test = labels[train_index], labels[test_index]
        kfoldList.append({
            "X_train": X_train,
            "X_test": X_test,
            "y_train":y_train,
            "y_test":y_test
        })
    return kfoldList

In [7]:
##################################################################################
##### define evaluator functions
##################################################################################

def pred2label(y_pred):
    y_pred = np.round(y_pred)
    return y_pred

In [8]:
# monitor = 'val_loss'

# epochs = 100
# batch_size = 32

# ##################################################################################
# ##### Function to customize the DLNN architecture with parameters
# ##################################################################################

# def DLNN_Classifier(input_vec_shape,
#                     dense_decode_units = 2048, ## Dense layer parameters,
#                     dense_layers = 5,
#                     prob = 0.5, learn_rate = 0.0005, loss = 'binary_crossentropy', metrics = 'accuracy'):
    
#     beta = 0.001
    
#     input1 = tf.keras.layers.Input(shape=input_vec_shape)
    
#     ######################################################################################################
#     ########  Classifier  ################################################################################
#     ######################################################################################################
    
#     y = tf.keras.layers.Dense(dense_decode_units, 
#                               kernel_regularizer = tf.keras.regularizers.l2(beta))(input1)
#     y = tf.keras.layers.Dropout(prob)(y)
    
#     for i in range(1,dense_layers+1):
    
#         y = tf.keras.layers.Dense(int(dense_decode_units/(2**i)), 
#                                   kernel_regularizer = tf.keras.regularizers.l2(beta), 
#                                  )(y)

#         y = tf.keras.layers.Dropout(prob)(y) 
    
#     y = tf.keras.layers.Dense(1, 
#                               kernel_regularizer = tf.keras.regularizers.l2(beta), 
#                               activation = 'sigmoid')(y)

#     ## Generate Model from input and output
#     model = tf.keras.models.Model(inputs=input1, outputs=y)
    
#     ## Compile model
#     if(metrics != None):
#         model.compile(optimizer = tf.keras.optimizers.Adam(learning_rate=learn_rate), 
#                       loss = loss, metrics = metrics)
#     else:
#         model.compile(optimizer = tf.keras.optimizers.Adam(learning_rate=learn_rate), 
#                       loss = loss)

#     return model

In [9]:
monitor = 'val_accuracy'

epochs = 100
batch_size = 32

##################################################################################
##### Function to customize the DLNN architecture with parameters
##################################################################################

def DLNN_Classifier(input_vec_shape,
                    dense_decode_units = 2048, ## Dense layer parameters,
                    dense_layers = 5,
                    prob = 0.5, learn_rate = 0.0005, 
                    gn = 0.01,
                    loss = 'binary_crossentropy', metrics = 'accuracy'):
    
    beta = 0.001
    
    input1 = tf.keras.layers.Input(shape=input_vec_shape)
    
    ######################################################################################################
    ########  Classifier  ################################################################################
    ######################################################################################################
    
    y = tf.keras.layers.Dense(dense_decode_units, 
                              kernel_regularizer = tf.keras.regularizers.l2(beta))(input1)
    y = tf.keras.layers.GaussianNoise(stddev=gn)(y)
    y = tf.keras.layers.Dropout(prob)(y)
    
    for i in range(1,dense_layers+1):
    
        y = tf.keras.layers.Dense(int(dense_decode_units/(2**i)), 
                                  kernel_regularizer = tf.keras.regularizers.l2(beta), 
                                 )(y)
        y = tf.keras.layers.GaussianNoise(stddev=gn)(y)
        y = tf.keras.layers.Dropout(prob)(y) 
    
    y = tf.keras.layers.Dense(1, 
                              kernel_regularizer = tf.keras.regularizers.l2(beta), 
                              activation = 'sigmoid')(y)

    ## Generate Model from input and output
    model = tf.keras.models.Model(inputs=input1, outputs=y)
    
    ## Compile model
    if(metrics != None):
        model.compile(optimizer = tf.keras.optimizers.Adam(learning_rate=learn_rate), 
                      loss = loss, metrics = metrics)
    else:
        model.compile(optimizer = tf.keras.optimizers.Adam(learning_rate=learn_rate), 
                      loss = loss)

    return model

In [10]:
# epochs = 100
# batch_size = 32

# ##################################################################################
# ##### Function to customize the DLNN architecture with parameters
# ##################################################################################

# def DLNN_Classifier(input_vec_shape,
#                     dense_decode_units = 8, ## Dense layer parameters,
#                     dense_layers = 2,
#                     prob = 0.5, learn_rate = 0.0005, loss = 'binary_crossentropy', metrics = 'accuracy'):
    
#     beta = 0.001
    
#     input1 = tf.keras.layers.Input(shape=input_vec_shape)
    
#     ######################################################################################################
#     ########  Classifier  ################################################################################
#     ######################################################################################################
    
#     y = tf.keras.layers.Dense(dense_decode_units, 
#                               kernel_regularizer = tf.keras.regularizers.l2(beta))(input1)
#     y = tf.keras.layers.BatchNormalization()(y)
#     y = tf.keras.layers.Dropout(prob)(y)
    
#     for i in range(1,dense_layers+1):
    
#         y = tf.keras.layers.Dense(int(dense_decode_units/(2**i)), 
#                                   kernel_regularizer = tf.keras.regularizers.l2(beta), 
#                                  )(y)
#         y = tf.keras.layers.BatchNormalization()(y)
#         y = tf.keras.layers.Dropout(prob)(y) 
    
#     y = tf.keras.layers.Dense(1, 
#                               kernel_regularizer = tf.keras.regularizers.l2(beta), 
#                               activation = 'sigmoid')(y)

#     ## Generate Model from input and output
#     model = tf.keras.models.Model(inputs=input1, outputs=y)
    
#     ## Compile model
#     if(metrics != None):
#         model.compile(optimizer = tf.keras.optimizers.Adam(learning_rate=learn_rate), 
#                       loss = loss, metrics = metrics)
#     else:
#         model.compile(optimizer = tf.keras.optimizers.Adam(learning_rate=learn_rate), 
#                       loss = loss)

#     return model

In [11]:
# epochs = 200
# batch_size = 32

# ##################################################################################
# ##### Function to customize the DLNN architecture with parameters
# ##################################################################################

# def DLNN_Classifier(input_vec_shape,
#                     dense_decode_units = 16, ## Dense layer parameters,
#                     dense_layers = 3,
#                     prob = 0.5, learn_rate = 0.0001, loss = 'binary_crossentropy', metrics = 'accuracy'):
    
#     beta = 0.001
    
#     input1 = tf.keras.layers.Input(shape=input_vec_shape)
    
#     ######################################################################################################
#     ########  Classifier  ################################################################################
#     ######################################################################################################
    
#     y = tf.keras.layers.Dense(dense_decode_units, 
#                               kernel_regularizer = tf.keras.regularizers.l2(beta))(input1)
#     y = tf.keras.layers.BatchNormalization()(y)
#     y = tf.keras.layers.Dropout(prob)(y)
    
#     for i in range(1,dense_layers+1):
    
#         y = tf.keras.layers.Dense(int(dense_decode_units/(2**i)), 
#                                   kernel_regularizer = tf.keras.regularizers.l2(beta), 
#                                  )(y)
#         y = tf.keras.layers.BatchNormalization()(y)
#         y = tf.keras.layers.Dropout(prob)(y) 
    
#     y = tf.keras.layers.Dense(1, 
#                               kernel_regularizer = tf.keras.regularizers.l2(beta), 
#                               activation = 'sigmoid')(y)

#     ## Generate Model from input and output
#     model = tf.keras.models.Model(inputs=input1, outputs=y)
    
#     ## Compile model
#     if(metrics != None):
#         model.compile(optimizer = tf.keras.optimizers.Adam(learning_rate=learn_rate), 
#                       loss = loss, metrics = metrics)
#     else:
#         model.compile(optimizer = tf.keras.optimizers.Adam(learning_rate=learn_rate), 
#                       loss = loss)

#     return model

In [12]:
# epochs = 200
# batch_size = 16

# ##################################################################################
# ##### Function to customize the DLNN architecture with parameters
# ##################################################################################

# def DLNN_Classifier(input_vec_shape,
#                     dense_decode_units = 8, ## Dense layer parameters,
#                     dense_layers = 2,
#                     prob_input = 0.9, prob = 0.5,
#                     learn_rate = 0.0001, loss = 'binary_crossentropy', metrics = 'accuracy'):
    
#     beta = 0.001
    
#     input1 = tf.keras.layers.Input(shape=input_vec_shape)
    
#     ######################################################################################################
#     ########  Classifier  ################################################################################
#     ######################################################################################################
    
#     y = tf.keras.layers.Dropout(prob_input)(input1)
    
#     y = tf.keras.layers.Dense(dense_decode_units, 
#                               kernel_regularizer = tf.keras.regularizers.l2(beta))(y)
#     y = tf.keras.layers.BatchNormalization()(y)
#     y = tf.keras.layers.GaussianNoise(stddev=0.1)(y)
#     y = tf.keras.layers.Dropout(prob)(y)
    
#     for i in range(1,dense_layers+1):
    
#         y = tf.keras.layers.Dense(int(dense_decode_units/(2**i)), 
#                                   kernel_regularizer = tf.keras.regularizers.l2(beta), 
#                                  )(y)
#         y = tf.keras.layers.BatchNormalization()(y)
#         y = tf.keras.layers.GaussianNoise(stddev=0.1)(y)
#         y = tf.keras.layers.Dropout(prob)(y) 
    
#     y = tf.keras.layers.Dense(1, 
#                               kernel_regularizer = tf.keras.regularizers.l2(beta), 
#                               activation = 'sigmoid')(y)

#     ## Generate Model from input and output
#     model = tf.keras.models.Model(inputs=input1, outputs=y)
    
#     ## Compile model
#     if(metrics != None):
#         model.compile(optimizer = tf.keras.optimizers.Adam(learning_rate=learn_rate), 
#                       loss = loss, metrics = metrics)
#     else:
#         model.compile(optimizer = tf.keras.optimizers.Adam(learning_rate=learn_rate), 
#                       loss = loss)

#     return model

In [13]:
# for step in range(10):
#     initial_learning_rate=1e-1
#     decay_steps=10000
#     decay_rate=0.9
#     print(step, ':', initial_learning_rate * decay_rate ** (step / decay_steps))

In [14]:
DLNN_Classifier((8420)).summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 8420)]            0         
                                                                 
 dense (Dense)               (None, 2048)              17246208  
                                                                 
 gaussian_noise (GaussianNoi  (None, 2048)             0         
 se)                                                             
                                                                 
 dropout (Dropout)           (None, 2048)              0         
                                                                 
 dense_1 (Dense)             (None, 1024)              2098176   
                                                                 
 gaussian_noise_1 (GaussianN  (None, 1024)             0         
 oise)                                                       

# Train data preparation

In [15]:
##################################################################################
##### Read CSV data
##################################################################################

train_data_filepath = os.path.join(input_data_folder, train_data_filename)
train_data = pd.read_csv(train_data_filepath, sep=',', header=0)
train_data = train_data.drop('label', axis=1)

train_data['label'] = pd.Series([int(val.split('_')[-2])
                                 for val in train_data['nameseq']])

train_data = train_data.drop('nameseq', axis=1)

##################################################################################
##### Extract features and labels, create folds
##################################################################################

train_features = np.array(train_data.drop('label', axis=1))
train_labels = np.array(train_data['label'])
train_labels = train_labels.reshape((train_labels.shape[0], 1))

input_vec_shape = train_features[0].shape

folds = build_kfold(train_features, train_labels, k=n_fold, shuffle=shuffle, seed=seed)

## Write the k-fold dataset to file
foldPath = os.path.join(outPath, expName, "{}fold".format(n_fold))
if(not os.path.isdir(foldPath)):
    os.makedirs(foldPath)
pickle.dump(folds, open(os.path.join(foldPath, foldName), "wb"))

# Training

In [16]:
##################################################################################
##### For each input file, train model and generate different outputs in a structured folder
##################################################################################

## create the evaluation data structure for all iterations
evaluations = {
    "Fold" : [],
    "Train_Test" : [],
    "Accuracy" : [],
    "Precision": [],
    "TPR": [],
    "FPR": [],
    "TPR_FPR_Thresholds": [],
    "AUC": [],
    "Sensitivity": [],
    "Specificity": [],
    "MCC":[]
}

##################################################################################
##### Train/Test model on all folds, generate evaluations
##################################################################################

## Create and set directory to save model
modelPath = os.path.join(outPath, expName, "{}fold".format(n_fold), "models")
if(not os.path.isdir(modelPath)):
    os.makedirs(modelPath)

i = -1
for fold in folds:
    i += 1
    
    print("\nTrain/Test model on Fold #"+str(i)+".")
    
    model = DLNN_Classifier(input_vec_shape = input_vec_shape)
    
    ## Define the model callbacks for early stopping and saving the model. Then train model
    current_model_path = os.path.join(modelPath, "bestModel-fold{}.hdf5".format(i))
    modelCallbacks = [
        tf.keras.callbacks.ModelCheckpoint(current_model_path,
                                           monitor = monitor, verbose = 1, save_best_only = True, 
                                           save_weights_only = False, mode = 'auto', save_freq = 'epoch'),
    ]
    
    # adding random shuffling of the dataset for training purpose
    index_arr = np.arange(fold["X_train"].shape[0])
    index_arr = np.random.permutation(index_arr)
    
    model.fit(x = fold["X_train"][index_arr], y = fold["y_train"][index_arr], batch_size = batch_size, epochs = epochs, verbose = 1, 
              callbacks = modelCallbacks, validation_data = (fold["X_test"], fold["y_test"]))
    
    model = tf.keras.models.load_model(current_model_path)
    
    ##################################################################################
    ##### Prediction and metrics for TRAIN dataset
    ##################################################################################

    y_pred = model.predict(fold["X_train"])
    label_pred = pred2label(y_pred)
    
    # Compute precision, recall, sensitivity, specifity, mcc
    acc = accuracy_score(fold["y_train"], label_pred)
    prec = precision_score(fold["y_train"],label_pred)
    mcc = matthews_corrcoef(fold["y_train"], label_pred)

    conf = confusion_matrix(fold["y_train"], label_pred)
    tn, fp, fn, tp = conf.ravel()
    sens = tp/(tp+fn)
    spec = tn/(tn+fp)
    
    fpr, tpr, thresholds = roc_curve(fold["y_train"], y_pred)
    auc = roc_auc_score(fold["y_train"], y_pred)
    
    evaluations["Fold"].append(i)
    evaluations["Train_Test"].append("Train")
    evaluations["Accuracy"].append(acc)
    evaluations["Precision"].append(prec)
    evaluations["TPR"].append(tpr)
    evaluations["FPR"].append(fpr)
    evaluations["TPR_FPR_Thresholds"].append(thresholds)
    evaluations["AUC"].append(auc)
    evaluations["Sensitivity"].append(sens)
    evaluations["Specificity"].append(spec)
    evaluations["MCC"].append(mcc)
    
    ##################################################################################
    ##### Prediction and metrics for TEST dataset
    ##################################################################################

    y_pred = model.predict(fold["X_test"])
    label_pred = pred2label(y_pred)
    
    # Compute precision, recall, sensitivity, specifity, mcc
    acc = accuracy_score(fold["y_test"], label_pred)
    prec = precision_score(fold["y_test"],label_pred)
    mcc = matthews_corrcoef(fold["y_test"], label_pred)

    conf = confusion_matrix(fold["y_test"], label_pred)
    tn, fp, fn, tp = conf.ravel()
    sens = tp/(tp+fn)
    spec = tn/(tn+fp)
    
    fpr, tpr, thresholds = roc_curve(fold["y_test"], y_pred)
    auc = roc_auc_score(fold["y_test"], y_pred)
    
    evaluations["Fold"].append(i)
    evaluations["Train_Test"].append("Test")
    evaluations["Accuracy"].append(acc)
    evaluations["Precision"].append(prec)
    evaluations["TPR"].append(tpr)
    evaluations["FPR"].append(fpr)
    evaluations["TPR_FPR_Thresholds"].append(thresholds)
    evaluations["AUC"].append(auc)
    evaluations["Sensitivity"].append(sens)
    evaluations["Specificity"].append(spec)
    evaluations["MCC"].append(mcc)
    
    del model
    tf.keras.backend.clear_session()


Train/Test model on Fold #0.
Epoch 1/200
Epoch 1: val_accuracy improved from -inf to 0.76520, saving model to Results\NT_Site_PredNTS_Classification_DLNN_Kmer\5fold\models\bestModel-fold0.hdf5
Epoch 2/200
Epoch 2: val_accuracy improved from 0.76520 to 0.77568, saving model to Results\NT_Site_PredNTS_Classification_DLNN_Kmer\5fold\models\bestModel-fold0.hdf5
Epoch 3/200
Epoch 3: val_accuracy did not improve from 0.77568
Epoch 4/200
Epoch 4: val_accuracy improved from 0.77568 to 0.77778, saving model to Results\NT_Site_PredNTS_Classification_DLNN_Kmer\5fold\models\bestModel-fold0.hdf5
Epoch 5/200
Epoch 5: val_accuracy did not improve from 0.77778
Epoch 6/200
Epoch 6: val_accuracy did not improve from 0.77778
Epoch 7/200
Epoch 7: val_accuracy did not improve from 0.77778
Epoch 8/200
Epoch 8: val_accuracy did not improve from 0.77778
Epoch 9/200
Epoch 9: val_accuracy did not improve from 0.77778
Epoch 10/200
Epoch 10: val_accuracy did not improve from 0.77778
Epoch 11/200
Epoch 11: val_ac

Epoch 29/200
Epoch 29: val_accuracy did not improve from 0.77987
Epoch 30/200
Epoch 30: val_accuracy did not improve from 0.77987
Epoch 31/200
Epoch 31: val_accuracy did not improve from 0.77987
Epoch 32/200
Epoch 32: val_accuracy did not improve from 0.77987
Epoch 33/200
Epoch 33: val_accuracy did not improve from 0.77987
Epoch 34/200
Epoch 34: val_accuracy did not improve from 0.77987
Epoch 35/200
Epoch 35: val_accuracy did not improve from 0.77987
Epoch 36/200
Epoch 36: val_accuracy did not improve from 0.77987
Epoch 37/200
Epoch 37: val_accuracy did not improve from 0.77987
Epoch 38/200
Epoch 38: val_accuracy did not improve from 0.77987
Epoch 39/200
Epoch 39: val_accuracy did not improve from 0.77987
Epoch 40/200
Epoch 40: val_accuracy did not improve from 0.77987
Epoch 41/200
Epoch 41: val_accuracy did not improve from 0.77987
Epoch 42/200
Epoch 42: val_accuracy did not improve from 0.77987
Epoch 43/200
Epoch 43: val_accuracy did not improve from 0.77987
Epoch 44/200
Epoch 44: va

Epoch 58: val_accuracy did not improve from 0.78616
Epoch 59/200
Epoch 59: val_accuracy did not improve from 0.78616
Epoch 60/200
Epoch 60: val_accuracy did not improve from 0.78616
Epoch 61/200
Epoch 61: val_accuracy did not improve from 0.78616
Epoch 62/200
Epoch 62: val_accuracy did not improve from 0.78616
Epoch 63/200
Epoch 63: val_accuracy did not improve from 0.78616
Epoch 64/200
Epoch 64: val_accuracy did not improve from 0.78616
Epoch 65/200
Epoch 65: val_accuracy did not improve from 0.78616
Epoch 66/200
Epoch 66: val_accuracy did not improve from 0.78616
Epoch 67/200
Epoch 67: val_accuracy did not improve from 0.78616
Epoch 68/200
Epoch 68: val_accuracy did not improve from 0.78616
Epoch 69/200
Epoch 69: val_accuracy did not improve from 0.78616
Epoch 70/200
Epoch 70: val_accuracy did not improve from 0.78616
Epoch 71/200
Epoch 71: val_accuracy did not improve from 0.78616
Epoch 72/200
Epoch 72: val_accuracy did not improve from 0.78616
Epoch 73/200
Epoch 73: val_accuracy di

Epoch 87: val_accuracy did not improve from 0.79036
Epoch 88/200
Epoch 88: val_accuracy did not improve from 0.79036
Epoch 89/200
Epoch 89: val_accuracy did not improve from 0.79036
Epoch 90/200
Epoch 90: val_accuracy did not improve from 0.79036
Epoch 91/200
Epoch 91: val_accuracy did not improve from 0.79036
Epoch 92/200
Epoch 92: val_accuracy did not improve from 0.79036
Epoch 93/200
Epoch 93: val_accuracy did not improve from 0.79036
Epoch 94/200
Epoch 94: val_accuracy did not improve from 0.79036
Epoch 95/200
Epoch 95: val_accuracy did not improve from 0.79036
Epoch 96/200
Epoch 96: val_accuracy did not improve from 0.79036
Epoch 97/200
Epoch 97: val_accuracy did not improve from 0.79036
Epoch 98/200
Epoch 98: val_accuracy did not improve from 0.79036
Epoch 99/200
Epoch 99: val_accuracy did not improve from 0.79036
Epoch 100/200
Epoch 100: val_accuracy did not improve from 0.79036
Epoch 101/200
Epoch 101: val_accuracy did not improve from 0.79036
Epoch 102/200
Epoch 102: val_accur

Epoch 117/200
Epoch 117: val_accuracy did not improve from 0.79036
Epoch 118/200
Epoch 118: val_accuracy did not improve from 0.79036
Epoch 119/200
Epoch 119: val_accuracy did not improve from 0.79036
Epoch 120/200
Epoch 120: val_accuracy did not improve from 0.79036
Epoch 121/200
Epoch 121: val_accuracy did not improve from 0.79036
Epoch 122/200
Epoch 122: val_accuracy did not improve from 0.79036
Epoch 123/200
Epoch 123: val_accuracy did not improve from 0.79036
Epoch 124/200
Epoch 124: val_accuracy did not improve from 0.79036
Epoch 125/200
Epoch 125: val_accuracy did not improve from 0.79036
Epoch 126/200
Epoch 126: val_accuracy did not improve from 0.79036
Epoch 127/200
Epoch 127: val_accuracy did not improve from 0.79036
Epoch 128/200
Epoch 128: val_accuracy did not improve from 0.79036
Epoch 129/200
Epoch 129: val_accuracy did not improve from 0.79036
Epoch 130/200
Epoch 130: val_accuracy did not improve from 0.79036
Epoch 131/200
Epoch 131: val_accuracy did not improve from 0.7

Epoch 146/200
Epoch 146: val_accuracy did not improve from 0.79036
Epoch 147/200
Epoch 147: val_accuracy did not improve from 0.79036
Epoch 148/200
Epoch 148: val_accuracy did not improve from 0.79036
Epoch 149/200
Epoch 149: val_accuracy did not improve from 0.79036
Epoch 150/200
Epoch 150: val_accuracy did not improve from 0.79036
Epoch 151/200
Epoch 151: val_accuracy did not improve from 0.79036
Epoch 152/200
Epoch 152: val_accuracy did not improve from 0.79036
Epoch 153/200
Epoch 153: val_accuracy did not improve from 0.79036
Epoch 154/200
Epoch 154: val_accuracy did not improve from 0.79036
Epoch 155/200
Epoch 155: val_accuracy did not improve from 0.79036
Epoch 156/200
Epoch 156: val_accuracy did not improve from 0.79036
Epoch 157/200
Epoch 157: val_accuracy did not improve from 0.79036
Epoch 158/200
Epoch 158: val_accuracy did not improve from 0.79036
Epoch 159/200
Epoch 159: val_accuracy did not improve from 0.79036
Epoch 160/200
Epoch 160: val_accuracy did not improve from 0.7

Epoch 175: val_accuracy did not improve from 0.79036
Epoch 176/200
Epoch 176: val_accuracy did not improve from 0.79036
Epoch 177/200
Epoch 177: val_accuracy did not improve from 0.79036
Epoch 178/200
Epoch 178: val_accuracy did not improve from 0.79036
Epoch 179/200
Epoch 179: val_accuracy did not improve from 0.79036
Epoch 180/200
Epoch 180: val_accuracy did not improve from 0.79036
Epoch 181/200
Epoch 181: val_accuracy did not improve from 0.79036
Epoch 182/200
Epoch 182: val_accuracy did not improve from 0.79036
Epoch 183/200
Epoch 183: val_accuracy did not improve from 0.79036
Epoch 184/200
Epoch 184: val_accuracy did not improve from 0.79036
Epoch 185/200
Epoch 185: val_accuracy did not improve from 0.79036
Epoch 186/200
Epoch 186: val_accuracy did not improve from 0.79036
Epoch 187/200
Epoch 187: val_accuracy did not improve from 0.79036
Epoch 188/200
Epoch 188: val_accuracy did not improve from 0.79036
Epoch 189/200
Epoch 189: val_accuracy did not improve from 0.79036
Epoch 190

Epoch 4/200
Epoch 4: val_accuracy did not improve from 0.79455
Epoch 5/200
Epoch 5: val_accuracy did not improve from 0.79455
Epoch 6/200
Epoch 6: val_accuracy did not improve from 0.79455
Epoch 7/200
Epoch 7: val_accuracy did not improve from 0.79455
Epoch 8/200
Epoch 8: val_accuracy did not improve from 0.79455
Epoch 9/200
Epoch 9: val_accuracy did not improve from 0.79455
Epoch 10/200
Epoch 10: val_accuracy did not improve from 0.79455
Epoch 11/200
Epoch 11: val_accuracy did not improve from 0.79455
Epoch 12/200
Epoch 12: val_accuracy improved from 0.79455 to 0.79874, saving model to Results\NT_Site_PredNTS_Classification_DLNN_Kmer\5fold\models\bestModel-fold1.hdf5
Epoch 13/200
Epoch 13: val_accuracy did not improve from 0.79874
Epoch 14/200
Epoch 14: val_accuracy did not improve from 0.79874
Epoch 15/200
Epoch 15: val_accuracy did not improve from 0.79874
Epoch 16/200
Epoch 16: val_accuracy did not improve from 0.79874
Epoch 17/200
Epoch 17: val_accuracy did not improve from 0.7987

Epoch 32/200
Epoch 32: val_accuracy did not improve from 0.80922
Epoch 33/200
Epoch 33: val_accuracy did not improve from 0.80922
Epoch 34/200
Epoch 34: val_accuracy did not improve from 0.80922
Epoch 35/200
Epoch 35: val_accuracy did not improve from 0.80922
Epoch 36/200
Epoch 36: val_accuracy did not improve from 0.80922
Epoch 37/200
Epoch 37: val_accuracy did not improve from 0.80922
Epoch 38/200
Epoch 38: val_accuracy did not improve from 0.80922
Epoch 39/200
Epoch 39: val_accuracy did not improve from 0.80922
Epoch 40/200
Epoch 40: val_accuracy did not improve from 0.80922
Epoch 41/200
Epoch 41: val_accuracy did not improve from 0.80922
Epoch 42/200
Epoch 42: val_accuracy did not improve from 0.80922
Epoch 43/200
Epoch 43: val_accuracy did not improve from 0.80922
Epoch 44/200
Epoch 44: val_accuracy did not improve from 0.80922
Epoch 45/200
Epoch 45: val_accuracy did not improve from 0.80922
Epoch 46/200
Epoch 46: val_accuracy did not improve from 0.80922
Epoch 47/200
Epoch 47: va

Epoch 61/200
Epoch 61: val_accuracy did not improve from 0.81551
Epoch 62/200
Epoch 62: val_accuracy did not improve from 0.81551
Epoch 63/200
Epoch 63: val_accuracy did not improve from 0.81551
Epoch 64/200
Epoch 64: val_accuracy did not improve from 0.81551
Epoch 65/200
Epoch 65: val_accuracy did not improve from 0.81551
Epoch 66/200
Epoch 66: val_accuracy did not improve from 0.81551
Epoch 67/200
Epoch 67: val_accuracy did not improve from 0.81551
Epoch 68/200
Epoch 68: val_accuracy did not improve from 0.81551
Epoch 69/200
Epoch 69: val_accuracy did not improve from 0.81551
Epoch 70/200
Epoch 70: val_accuracy did not improve from 0.81551
Epoch 71/200
Epoch 71: val_accuracy did not improve from 0.81551
Epoch 72/200
Epoch 72: val_accuracy did not improve from 0.81551
Epoch 73/200
Epoch 73: val_accuracy did not improve from 0.81551
Epoch 74/200
Epoch 74: val_accuracy did not improve from 0.81551
Epoch 75/200
Epoch 75: val_accuracy did not improve from 0.81551
Epoch 76/200
Epoch 76: va

Epoch 91/200
Epoch 91: val_accuracy did not improve from 0.81551
Epoch 92/200
Epoch 92: val_accuracy did not improve from 0.81551
Epoch 93/200
Epoch 93: val_accuracy did not improve from 0.81551
Epoch 94/200
Epoch 94: val_accuracy did not improve from 0.81551
Epoch 95/200
Epoch 95: val_accuracy did not improve from 0.81551
Epoch 96/200
Epoch 96: val_accuracy did not improve from 0.81551
Epoch 97/200
Epoch 97: val_accuracy did not improve from 0.81551
Epoch 98/200
Epoch 98: val_accuracy did not improve from 0.81551
Epoch 99/200
Epoch 99: val_accuracy did not improve from 0.81551
Epoch 100/200
Epoch 100: val_accuracy did not improve from 0.81551
Epoch 101/200
Epoch 101: val_accuracy did not improve from 0.81551
Epoch 102/200
Epoch 102: val_accuracy did not improve from 0.81551
Epoch 103/200
Epoch 103: val_accuracy did not improve from 0.81551
Epoch 104/200
Epoch 104: val_accuracy did not improve from 0.81551
Epoch 105/200
Epoch 105: val_accuracy did not improve from 0.81551
Epoch 106/200

Epoch 120/200
Epoch 120: val_accuracy did not improve from 0.81551
Epoch 121/200
Epoch 121: val_accuracy did not improve from 0.81551
Epoch 122/200
Epoch 122: val_accuracy did not improve from 0.81551
Epoch 123/200
Epoch 123: val_accuracy did not improve from 0.81551
Epoch 124/200
Epoch 124: val_accuracy did not improve from 0.81551
Epoch 125/200
Epoch 125: val_accuracy did not improve from 0.81551
Epoch 126/200
Epoch 126: val_accuracy did not improve from 0.81551
Epoch 127/200
Epoch 127: val_accuracy did not improve from 0.81551
Epoch 128/200
Epoch 128: val_accuracy did not improve from 0.81551
Epoch 129/200
Epoch 129: val_accuracy did not improve from 0.81551
Epoch 130/200
Epoch 130: val_accuracy did not improve from 0.81551
Epoch 131/200
Epoch 131: val_accuracy did not improve from 0.81551
Epoch 132/200
Epoch 132: val_accuracy did not improve from 0.81551
Epoch 133/200
Epoch 133: val_accuracy did not improve from 0.81551
Epoch 134/200
Epoch 134: val_accuracy did not improve from 0.8

Epoch 149: val_accuracy did not improve from 0.81551
Epoch 150/200
Epoch 150: val_accuracy did not improve from 0.81551
Epoch 151/200
Epoch 151: val_accuracy did not improve from 0.81551
Epoch 152/200
Epoch 152: val_accuracy did not improve from 0.81551
Epoch 153/200
Epoch 153: val_accuracy did not improve from 0.81551
Epoch 154/200
Epoch 154: val_accuracy did not improve from 0.81551
Epoch 155/200
Epoch 155: val_accuracy did not improve from 0.81551
Epoch 156/200
Epoch 156: val_accuracy did not improve from 0.81551
Epoch 157/200
Epoch 157: val_accuracy did not improve from 0.81551
Epoch 158/200
Epoch 158: val_accuracy did not improve from 0.81551
Epoch 159/200
Epoch 159: val_accuracy did not improve from 0.81551
Epoch 160/200
Epoch 160: val_accuracy did not improve from 0.81551
Epoch 161/200
Epoch 161: val_accuracy did not improve from 0.81551
Epoch 162/200
Epoch 162: val_accuracy did not improve from 0.81551
Epoch 163/200
Epoch 163: val_accuracy did not improve from 0.81551
Epoch 164

Epoch 178: val_accuracy did not improve from 0.81551
Epoch 179/200
Epoch 179: val_accuracy did not improve from 0.81551
Epoch 180/200
Epoch 180: val_accuracy did not improve from 0.81551
Epoch 181/200
Epoch 181: val_accuracy did not improve from 0.81551
Epoch 182/200
Epoch 182: val_accuracy did not improve from 0.81551
Epoch 183/200
Epoch 183: val_accuracy did not improve from 0.81551
Epoch 184/200
Epoch 184: val_accuracy did not improve from 0.81551
Epoch 185/200
Epoch 185: val_accuracy did not improve from 0.81551
Epoch 186/200
Epoch 186: val_accuracy did not improve from 0.81551
Epoch 187/200
Epoch 187: val_accuracy did not improve from 0.81551
Epoch 188/200
Epoch 188: val_accuracy did not improve from 0.81551
Epoch 189/200
Epoch 189: val_accuracy did not improve from 0.81551
Epoch 190/200
Epoch 190: val_accuracy did not improve from 0.81551
Epoch 191/200
Epoch 191: val_accuracy did not improve from 0.81551
Epoch 192/200
Epoch 192: val_accuracy did not improve from 0.81551
Epoch 193

Epoch 7/200
Epoch 7: val_accuracy did not improve from 0.77311
Epoch 8/200
Epoch 8: val_accuracy did not improve from 0.77311
Epoch 9/200
Epoch 9: val_accuracy did not improve from 0.77311
Epoch 10/200
Epoch 10: val_accuracy did not improve from 0.77311
Epoch 11/200
Epoch 11: val_accuracy did not improve from 0.77311
Epoch 12/200
Epoch 12: val_accuracy did not improve from 0.77311
Epoch 13/200
Epoch 13: val_accuracy did not improve from 0.77311
Epoch 14/200
Epoch 14: val_accuracy did not improve from 0.77311
Epoch 15/200
Epoch 15: val_accuracy did not improve from 0.77311
Epoch 16/200
Epoch 16: val_accuracy did not improve from 0.77311
Epoch 17/200
Epoch 17: val_accuracy did not improve from 0.77311
Epoch 18/200
Epoch 18: val_accuracy improved from 0.77311 to 0.78361, saving model to Results\NT_Site_PredNTS_Classification_DLNN_Kmer\5fold\models\bestModel-fold2.hdf5
Epoch 19/200
Epoch 19: val_accuracy did not improve from 0.78361
Epoch 20/200
Epoch 20: val_accuracy did not improve from 

Epoch 36/200
Epoch 36: val_accuracy did not improve from 0.78992
Epoch 37/200
Epoch 37: val_accuracy did not improve from 0.78992
Epoch 38/200
Epoch 38: val_accuracy did not improve from 0.78992
Epoch 39/200
Epoch 39: val_accuracy did not improve from 0.78992
Epoch 40/200
Epoch 40: val_accuracy did not improve from 0.78992
Epoch 41/200
Epoch 41: val_accuracy did not improve from 0.78992
Epoch 42/200
Epoch 42: val_accuracy did not improve from 0.78992
Epoch 43/200
Epoch 43: val_accuracy did not improve from 0.78992
Epoch 44/200
Epoch 44: val_accuracy did not improve from 0.78992
Epoch 45/200
Epoch 45: val_accuracy improved from 0.78992 to 0.79622, saving model to Results\NT_Site_PredNTS_Classification_DLNN_Kmer\5fold\models\bestModel-fold2.hdf5
Epoch 46/200
Epoch 46: val_accuracy did not improve from 0.79622
Epoch 47/200
Epoch 47: val_accuracy did not improve from 0.79622
Epoch 48/200
Epoch 48: val_accuracy did not improve from 0.79622
Epoch 49/200
Epoch 49: val_accuracy did not improve

Epoch 65/200
Epoch 65: val_accuracy did not improve from 0.79622
Epoch 66/200
Epoch 66: val_accuracy did not improve from 0.79622
Epoch 67/200
Epoch 67: val_accuracy did not improve from 0.79622
Epoch 68/200
Epoch 68: val_accuracy did not improve from 0.79622
Epoch 69/200
Epoch 69: val_accuracy did not improve from 0.79622
Epoch 70/200
Epoch 70: val_accuracy did not improve from 0.79622
Epoch 71/200
Epoch 71: val_accuracy did not improve from 0.79622
Epoch 72/200
Epoch 72: val_accuracy did not improve from 0.79622
Epoch 73/200
Epoch 73: val_accuracy did not improve from 0.79622
Epoch 74/200
Epoch 74: val_accuracy did not improve from 0.79622
Epoch 75/200
Epoch 75: val_accuracy did not improve from 0.79622
Epoch 76/200
Epoch 76: val_accuracy did not improve from 0.79622
Epoch 77/200
Epoch 77: val_accuracy did not improve from 0.79622
Epoch 78/200
Epoch 78: val_accuracy did not improve from 0.79622
Epoch 79/200
Epoch 79: val_accuracy did not improve from 0.79622
Epoch 80/200
Epoch 80: va

Epoch 95/200
Epoch 95: val_accuracy did not improve from 0.79622
Epoch 96/200
Epoch 96: val_accuracy did not improve from 0.79622
Epoch 97/200
Epoch 97: val_accuracy did not improve from 0.79622
Epoch 98/200
Epoch 98: val_accuracy did not improve from 0.79622
Epoch 99/200
Epoch 99: val_accuracy did not improve from 0.79622
Epoch 100/200
Epoch 100: val_accuracy did not improve from 0.79622
Epoch 101/200
Epoch 101: val_accuracy did not improve from 0.79622
Epoch 102/200
Epoch 102: val_accuracy did not improve from 0.79622
Epoch 103/200
Epoch 103: val_accuracy did not improve from 0.79622
Epoch 104/200
Epoch 104: val_accuracy did not improve from 0.79622
Epoch 105/200
Epoch 105: val_accuracy did not improve from 0.79622
Epoch 106/200
Epoch 106: val_accuracy did not improve from 0.79622
Epoch 107/200
Epoch 107: val_accuracy did not improve from 0.79622
Epoch 108/200
Epoch 108: val_accuracy did not improve from 0.79622
Epoch 109/200
Epoch 109: val_accuracy did not improve from 0.79622
Epoch

Epoch 124/200
Epoch 124: val_accuracy did not improve from 0.79622
Epoch 125/200
Epoch 125: val_accuracy did not improve from 0.79622
Epoch 126/200
Epoch 126: val_accuracy did not improve from 0.79622
Epoch 127/200
Epoch 127: val_accuracy did not improve from 0.79622
Epoch 128/200
Epoch 128: val_accuracy did not improve from 0.79622
Epoch 129/200
Epoch 129: val_accuracy did not improve from 0.79622
Epoch 130/200
Epoch 130: val_accuracy did not improve from 0.79622
Epoch 131/200
Epoch 131: val_accuracy did not improve from 0.79622
Epoch 132/200
Epoch 132: val_accuracy did not improve from 0.79622
Epoch 133/200
Epoch 133: val_accuracy did not improve from 0.79622
Epoch 134/200
Epoch 134: val_accuracy did not improve from 0.79622
Epoch 135/200
Epoch 135: val_accuracy did not improve from 0.79622
Epoch 136/200
Epoch 136: val_accuracy did not improve from 0.79622
Epoch 137/200
Epoch 137: val_accuracy did not improve from 0.79622
Epoch 138/200
Epoch 138: val_accuracy did not improve from 0.7

Epoch 153: val_accuracy did not improve from 0.79622
Epoch 154/200
Epoch 154: val_accuracy did not improve from 0.79622
Epoch 155/200
Epoch 155: val_accuracy did not improve from 0.79622
Epoch 156/200
Epoch 156: val_accuracy did not improve from 0.79622
Epoch 157/200
Epoch 157: val_accuracy did not improve from 0.79622
Epoch 158/200
Epoch 158: val_accuracy did not improve from 0.79622
Epoch 159/200
Epoch 159: val_accuracy did not improve from 0.79622
Epoch 160/200
Epoch 160: val_accuracy did not improve from 0.79622
Epoch 161/200
Epoch 161: val_accuracy did not improve from 0.79622
Epoch 162/200
Epoch 162: val_accuracy did not improve from 0.79622
Epoch 163/200
Epoch 163: val_accuracy did not improve from 0.79622
Epoch 164/200
Epoch 164: val_accuracy did not improve from 0.79622
Epoch 165/200
Epoch 165: val_accuracy did not improve from 0.79622
Epoch 166/200
Epoch 166: val_accuracy did not improve from 0.79622
Epoch 167/200
Epoch 167: val_accuracy did not improve from 0.79622
Epoch 168

Epoch 182: val_accuracy did not improve from 0.79622
Epoch 183/200
Epoch 183: val_accuracy did not improve from 0.79622
Epoch 184/200
Epoch 184: val_accuracy did not improve from 0.79622
Epoch 185/200
Epoch 185: val_accuracy did not improve from 0.79622
Epoch 186/200
Epoch 186: val_accuracy did not improve from 0.79622
Epoch 187/200
Epoch 187: val_accuracy did not improve from 0.79622
Epoch 188/200
Epoch 188: val_accuracy did not improve from 0.79622
Epoch 189/200
Epoch 189: val_accuracy did not improve from 0.79622
Epoch 190/200
Epoch 190: val_accuracy did not improve from 0.79622
Epoch 191/200
Epoch 191: val_accuracy did not improve from 0.79622
Epoch 192/200
Epoch 192: val_accuracy did not improve from 0.79622
Epoch 193/200
Epoch 193: val_accuracy did not improve from 0.79622
Epoch 194/200
Epoch 194: val_accuracy did not improve from 0.79622
Epoch 195/200
Epoch 195: val_accuracy did not improve from 0.79622
Epoch 196/200
Epoch 196: val_accuracy did not improve from 0.79622
Epoch 197

Epoch 10/200
Epoch 10: val_accuracy did not improve from 0.77731
Epoch 11/200
Epoch 11: val_accuracy did not improve from 0.77731
Epoch 12/200
Epoch 12: val_accuracy did not improve from 0.77731
Epoch 13/200
Epoch 13: val_accuracy did not improve from 0.77731
Epoch 14/200
Epoch 14: val_accuracy did not improve from 0.77731
Epoch 15/200
Epoch 15: val_accuracy did not improve from 0.77731
Epoch 16/200
Epoch 16: val_accuracy did not improve from 0.77731
Epoch 17/200
Epoch 17: val_accuracy did not improve from 0.77731
Epoch 18/200
Epoch 18: val_accuracy did not improve from 0.77731
Epoch 19/200
Epoch 19: val_accuracy did not improve from 0.77731
Epoch 20/200
Epoch 20: val_accuracy did not improve from 0.77731
Epoch 21/200
Epoch 21: val_accuracy did not improve from 0.77731
Epoch 22/200
Epoch 22: val_accuracy did not improve from 0.77731
Epoch 23/200
Epoch 23: val_accuracy did not improve from 0.77731
Epoch 24/200
Epoch 24: val_accuracy did not improve from 0.77731
Epoch 25/200
Epoch 25: va

Epoch 39: val_accuracy did not improve from 0.77731
Epoch 40/200
Epoch 40: val_accuracy did not improve from 0.77731
Epoch 41/200
Epoch 41: val_accuracy did not improve from 0.77731
Epoch 42/200
Epoch 42: val_accuracy did not improve from 0.77731
Epoch 43/200
Epoch 43: val_accuracy did not improve from 0.77731
Epoch 44/200
Epoch 44: val_accuracy did not improve from 0.77731
Epoch 45/200
Epoch 45: val_accuracy did not improve from 0.77731
Epoch 46/200
Epoch 46: val_accuracy did not improve from 0.77731
Epoch 47/200
Epoch 47: val_accuracy did not improve from 0.77731
Epoch 48/200
Epoch 48: val_accuracy did not improve from 0.77731
Epoch 49/200
Epoch 49: val_accuracy did not improve from 0.77731
Epoch 50/200
Epoch 50: val_accuracy did not improve from 0.77731
Epoch 51/200
Epoch 51: val_accuracy did not improve from 0.77731
Epoch 52/200
Epoch 52: val_accuracy did not improve from 0.77731
Epoch 53/200
Epoch 53: val_accuracy did not improve from 0.77731
Epoch 54/200
Epoch 54: val_accuracy di

Epoch 69/200
Epoch 69: val_accuracy did not improve from 0.77731
Epoch 70/200
Epoch 70: val_accuracy did not improve from 0.77731
Epoch 71/200
Epoch 71: val_accuracy did not improve from 0.77731
Epoch 72/200
Epoch 72: val_accuracy did not improve from 0.77731
Epoch 73/200
Epoch 73: val_accuracy did not improve from 0.77731
Epoch 74/200
Epoch 74: val_accuracy did not improve from 0.77731
Epoch 75/200
Epoch 75: val_accuracy did not improve from 0.77731
Epoch 76/200
Epoch 76: val_accuracy did not improve from 0.77731
Epoch 77/200
Epoch 77: val_accuracy did not improve from 0.77731
Epoch 78/200
Epoch 78: val_accuracy did not improve from 0.77731
Epoch 79/200
Epoch 79: val_accuracy did not improve from 0.77731
Epoch 80/200
Epoch 80: val_accuracy did not improve from 0.77731
Epoch 81/200
Epoch 81: val_accuracy did not improve from 0.77731
Epoch 82/200
Epoch 82: val_accuracy did not improve from 0.77731
Epoch 83/200
Epoch 83: val_accuracy did not improve from 0.77731
Epoch 84/200
Epoch 84: va

Epoch 98: val_accuracy did not improve from 0.77731
Epoch 99/200
Epoch 99: val_accuracy did not improve from 0.77731
Epoch 100/200
Epoch 100: val_accuracy did not improve from 0.77731
Epoch 101/200
Epoch 101: val_accuracy did not improve from 0.77731
Epoch 102/200
Epoch 102: val_accuracy did not improve from 0.77731
Epoch 103/200
Epoch 103: val_accuracy did not improve from 0.77731
Epoch 104/200
Epoch 104: val_accuracy did not improve from 0.77731
Epoch 105/200
Epoch 105: val_accuracy did not improve from 0.77731
Epoch 106/200
Epoch 106: val_accuracy did not improve from 0.77731
Epoch 107/200
Epoch 107: val_accuracy did not improve from 0.77731
Epoch 108/200
Epoch 108: val_accuracy did not improve from 0.77731
Epoch 109/200
Epoch 109: val_accuracy did not improve from 0.77731
Epoch 110/200
Epoch 110: val_accuracy did not improve from 0.77731
Epoch 111/200
Epoch 111: val_accuracy did not improve from 0.77731
Epoch 112/200
Epoch 112: val_accuracy did not improve from 0.77731
Epoch 113/20

Epoch 127: val_accuracy did not improve from 0.77731
Epoch 128/200
Epoch 128: val_accuracy did not improve from 0.77731
Epoch 129/200
Epoch 129: val_accuracy did not improve from 0.77731
Epoch 130/200
Epoch 130: val_accuracy did not improve from 0.77731
Epoch 131/200
Epoch 131: val_accuracy did not improve from 0.77731
Epoch 132/200
Epoch 132: val_accuracy did not improve from 0.77731
Epoch 133/200
Epoch 133: val_accuracy did not improve from 0.77731
Epoch 134/200
Epoch 134: val_accuracy did not improve from 0.77731
Epoch 135/200
Epoch 135: val_accuracy did not improve from 0.77731
Epoch 136/200
Epoch 136: val_accuracy did not improve from 0.77731
Epoch 137/200
Epoch 137: val_accuracy did not improve from 0.77731
Epoch 138/200
Epoch 138: val_accuracy did not improve from 0.77731
Epoch 139/200
Epoch 139: val_accuracy did not improve from 0.77731
Epoch 140/200
Epoch 140: val_accuracy did not improve from 0.77731
Epoch 141/200
Epoch 141: val_accuracy did not improve from 0.77731
Epoch 142

Epoch 156: val_accuracy did not improve from 0.77731
Epoch 157/200
Epoch 157: val_accuracy did not improve from 0.77731
Epoch 158/200
Epoch 158: val_accuracy did not improve from 0.77731
Epoch 159/200
Epoch 159: val_accuracy did not improve from 0.77731
Epoch 160/200
Epoch 160: val_accuracy did not improve from 0.77731
Epoch 161/200
Epoch 161: val_accuracy did not improve from 0.77731
Epoch 162/200
Epoch 162: val_accuracy did not improve from 0.77731
Epoch 163/200
Epoch 163: val_accuracy did not improve from 0.77731
Epoch 164/200
Epoch 164: val_accuracy did not improve from 0.77731
Epoch 165/200
Epoch 165: val_accuracy did not improve from 0.77731
Epoch 166/200
Epoch 166: val_accuracy did not improve from 0.77731
Epoch 167/200
Epoch 167: val_accuracy did not improve from 0.77731
Epoch 168/200
Epoch 168: val_accuracy did not improve from 0.77731
Epoch 169/200
Epoch 169: val_accuracy did not improve from 0.77731
Epoch 170/200
Epoch 170: val_accuracy did not improve from 0.77731
Epoch 171

Epoch 185: val_accuracy did not improve from 0.77731
Epoch 186/200
Epoch 186: val_accuracy did not improve from 0.77731
Epoch 187/200
Epoch 187: val_accuracy did not improve from 0.77731
Epoch 188/200
Epoch 188: val_accuracy did not improve from 0.77731
Epoch 189/200
Epoch 189: val_accuracy did not improve from 0.77731
Epoch 190/200
Epoch 190: val_accuracy did not improve from 0.77731
Epoch 191/200
Epoch 191: val_accuracy did not improve from 0.77731
Epoch 192/200
Epoch 192: val_accuracy did not improve from 0.77731
Epoch 193/200
Epoch 193: val_accuracy did not improve from 0.77731
Epoch 194/200
Epoch 194: val_accuracy did not improve from 0.77731
Epoch 195/200
Epoch 195: val_accuracy did not improve from 0.77731
Epoch 196/200
Epoch 196: val_accuracy did not improve from 0.77731
Epoch 197/200
Epoch 197: val_accuracy did not improve from 0.77731
Epoch 198/200
Epoch 198: val_accuracy did not improve from 0.77731
Epoch 199/200
Epoch 199: val_accuracy did not improve from 0.77731
Epoch 200

Epoch 13/200
Epoch 13: val_accuracy did not improve from 0.80882
Epoch 14/200
Epoch 14: val_accuracy did not improve from 0.80882
Epoch 15/200
Epoch 15: val_accuracy did not improve from 0.80882
Epoch 16/200
Epoch 16: val_accuracy did not improve from 0.80882
Epoch 17/200
Epoch 17: val_accuracy did not improve from 0.80882
Epoch 18/200
Epoch 18: val_accuracy did not improve from 0.80882
Epoch 19/200
Epoch 19: val_accuracy did not improve from 0.80882
Epoch 20/200
Epoch 20: val_accuracy did not improve from 0.80882
Epoch 21/200
Epoch 21: val_accuracy did not improve from 0.80882
Epoch 22/200
Epoch 22: val_accuracy did not improve from 0.80882
Epoch 23/200
Epoch 23: val_accuracy did not improve from 0.80882
Epoch 24/200
Epoch 24: val_accuracy did not improve from 0.80882
Epoch 25/200
Epoch 25: val_accuracy did not improve from 0.80882
Epoch 26/200
Epoch 26: val_accuracy did not improve from 0.80882
Epoch 27/200
Epoch 27: val_accuracy did not improve from 0.80882
Epoch 28/200
Epoch 28: va

Epoch 41: val_accuracy did not improve from 0.82143
Epoch 42/200
Epoch 42: val_accuracy did not improve from 0.82143
Epoch 43/200
Epoch 43: val_accuracy did not improve from 0.82143
Epoch 44/200
Epoch 44: val_accuracy did not improve from 0.82143
Epoch 45/200
Epoch 45: val_accuracy did not improve from 0.82143
Epoch 46/200
Epoch 46: val_accuracy did not improve from 0.82143
Epoch 47/200
Epoch 47: val_accuracy did not improve from 0.82143
Epoch 48/200
Epoch 48: val_accuracy did not improve from 0.82143
Epoch 49/200
Epoch 49: val_accuracy did not improve from 0.82143
Epoch 50/200
Epoch 50: val_accuracy did not improve from 0.82143
Epoch 51/200
Epoch 51: val_accuracy did not improve from 0.82143
Epoch 52/200
Epoch 52: val_accuracy did not improve from 0.82143
Epoch 53/200
Epoch 53: val_accuracy did not improve from 0.82143
Epoch 54/200
Epoch 54: val_accuracy did not improve from 0.82143
Epoch 55/200
Epoch 55: val_accuracy did not improve from 0.82143
Epoch 56/200
Epoch 56: val_accuracy di

Epoch 71/200
Epoch 71: val_accuracy did not improve from 0.82143
Epoch 72/200
Epoch 72: val_accuracy did not improve from 0.82143
Epoch 73/200
Epoch 73: val_accuracy did not improve from 0.82143
Epoch 74/200
Epoch 74: val_accuracy did not improve from 0.82143
Epoch 75/200
Epoch 75: val_accuracy did not improve from 0.82143
Epoch 76/200
Epoch 76: val_accuracy did not improve from 0.82143
Epoch 77/200
Epoch 77: val_accuracy did not improve from 0.82143
Epoch 78/200
Epoch 78: val_accuracy did not improve from 0.82143
Epoch 79/200
Epoch 79: val_accuracy did not improve from 0.82143
Epoch 80/200
Epoch 80: val_accuracy did not improve from 0.82143
Epoch 81/200
Epoch 81: val_accuracy did not improve from 0.82143
Epoch 82/200
Epoch 82: val_accuracy did not improve from 0.82143
Epoch 83/200
Epoch 83: val_accuracy did not improve from 0.82143
Epoch 84/200
Epoch 84: val_accuracy did not improve from 0.82143
Epoch 85/200
Epoch 85: val_accuracy did not improve from 0.82143
Epoch 86/200
Epoch 86: va

Epoch 100: val_accuracy did not improve from 0.82143
Epoch 101/200
Epoch 101: val_accuracy did not improve from 0.82143
Epoch 102/200
Epoch 102: val_accuracy did not improve from 0.82143
Epoch 103/200
Epoch 103: val_accuracy did not improve from 0.82143
Epoch 104/200
Epoch 104: val_accuracy did not improve from 0.82143
Epoch 105/200
Epoch 105: val_accuracy did not improve from 0.82143
Epoch 106/200
Epoch 106: val_accuracy did not improve from 0.82143
Epoch 107/200
Epoch 107: val_accuracy did not improve from 0.82143
Epoch 108/200
Epoch 108: val_accuracy did not improve from 0.82143
Epoch 109/200
Epoch 109: val_accuracy did not improve from 0.82143
Epoch 110/200
Epoch 110: val_accuracy did not improve from 0.82143
Epoch 111/200
Epoch 111: val_accuracy did not improve from 0.82143
Epoch 112/200
Epoch 112: val_accuracy did not improve from 0.82143
Epoch 113/200
Epoch 113: val_accuracy did not improve from 0.82143
Epoch 114/200
Epoch 114: val_accuracy did not improve from 0.82143
Epoch 115

Epoch 129: val_accuracy did not improve from 0.82143
Epoch 130/200
Epoch 130: val_accuracy did not improve from 0.82143
Epoch 131/200
Epoch 131: val_accuracy did not improve from 0.82143
Epoch 132/200
Epoch 132: val_accuracy did not improve from 0.82143
Epoch 133/200
Epoch 133: val_accuracy did not improve from 0.82143
Epoch 134/200
Epoch 134: val_accuracy did not improve from 0.82143
Epoch 135/200
Epoch 135: val_accuracy did not improve from 0.82143
Epoch 136/200
Epoch 136: val_accuracy did not improve from 0.82143
Epoch 137/200
Epoch 137: val_accuracy did not improve from 0.82143
Epoch 138/200
Epoch 138: val_accuracy did not improve from 0.82143
Epoch 139/200
Epoch 139: val_accuracy did not improve from 0.82143
Epoch 140/200
Epoch 140: val_accuracy did not improve from 0.82143
Epoch 141/200
Epoch 141: val_accuracy did not improve from 0.82143
Epoch 142/200
Epoch 142: val_accuracy did not improve from 0.82143
Epoch 143/200
Epoch 143: val_accuracy did not improve from 0.82143
Epoch 144

Epoch 158: val_accuracy did not improve from 0.82143
Epoch 159/200
Epoch 159: val_accuracy did not improve from 0.82143
Epoch 160/200
Epoch 160: val_accuracy did not improve from 0.82143
Epoch 161/200
Epoch 161: val_accuracy did not improve from 0.82143
Epoch 162/200
Epoch 162: val_accuracy did not improve from 0.82143
Epoch 163/200
Epoch 163: val_accuracy did not improve from 0.82143
Epoch 164/200
Epoch 164: val_accuracy did not improve from 0.82143
Epoch 165/200
Epoch 165: val_accuracy did not improve from 0.82143
Epoch 166/200
Epoch 166: val_accuracy did not improve from 0.82143
Epoch 167/200
Epoch 167: val_accuracy did not improve from 0.82143
Epoch 168/200
Epoch 168: val_accuracy did not improve from 0.82143
Epoch 169/200
Epoch 169: val_accuracy did not improve from 0.82143
Epoch 170/200
Epoch 170: val_accuracy did not improve from 0.82143
Epoch 171/200
Epoch 171: val_accuracy did not improve from 0.82143
Epoch 172/200
Epoch 172: val_accuracy did not improve from 0.82143
Epoch 173

Epoch 187: val_accuracy did not improve from 0.82143
Epoch 188/200
Epoch 188: val_accuracy did not improve from 0.82143
Epoch 189/200
Epoch 189: val_accuracy did not improve from 0.82143
Epoch 190/200
Epoch 190: val_accuracy did not improve from 0.82143
Epoch 191/200
Epoch 191: val_accuracy did not improve from 0.82143
Epoch 192/200
Epoch 192: val_accuracy did not improve from 0.82143
Epoch 193/200
Epoch 193: val_accuracy did not improve from 0.82143
Epoch 194/200
Epoch 194: val_accuracy did not improve from 0.82143
Epoch 195/200
Epoch 195: val_accuracy did not improve from 0.82143
Epoch 196/200
Epoch 196: val_accuracy did not improve from 0.82143
Epoch 197/200
Epoch 197: val_accuracy did not improve from 0.82143
Epoch 198/200
Epoch 198: val_accuracy did not improve from 0.82143
Epoch 199/200
Epoch 199: val_accuracy did not improve from 0.82143
Epoch 200/200
Epoch 200: val_accuracy did not improve from 0.82143


## k-fold Training evaluation

In [17]:
evaluations_df = pd.DataFrame.from_dict(evaluations)

evaluations_df_grouped = evaluations_df.groupby(["Train_Test"]).mean().filter(['Accuracy', 
                                                                               'Precision', 
                                                                               'AUC', 
                                                                               'Sensitivity', 
                                                                               'Specificity', 
                                                                               'MCC'])

evaluations_df_grouped

Unnamed: 0_level_0,Accuracy,Precision,AUC,Sensitivity,Specificity,MCC
Train_Test,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Test,0.800166,0.810883,0.859428,0.787588,0.812689,0.602598
Train,0.987931,0.985718,0.998322,0.990345,0.985518,0.975941


In [18]:
# 	Accuracy	Precision	AUC	Sensitivity	Specificity	MCC
# Train_Test						
# Test	0.804782	0.822802	0.861915	0.777494	0.832056	0.610976
# Train	0.991184	0.993261	0.997514	0.989086	0.993283	0.982392

In [19]:
evaluations_df.groupby(["Train_Test"]).std().filter(['Accuracy', 
                                                       'Precision', 
                                                       'AUC', 
                                                       'Sensitivity', 
                                                       'Specificity', 
                                                       'MCC'])

Unnamed: 0_level_0,Accuracy,Precision,AUC,Sensitivity,Specificity,MCC
Train_Test,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Test,0.018178,0.04284,0.022797,0.04134,0.055795,0.038056
Train,0.007456,0.012992,0.000739,0.004536,0.013383,0.014792


In [20]:
# small 8,2 - 200,16
# 	Accuracy	Precision	AUC	Sensitivity	Specificity	MCC
# Train_Test						
# Test	0.798895	0.802586	0.872018	0.792578	0.805204	0.597894
# Train	0.995382	0.996426	0.999765	0.994333	0.996431	0.990768

In [21]:
# small 16, 3
# 	Accuracy	Precision	AUC	Sensitivity	Specificity	MCC
# Train_Test						
# Test	0.787978	0.796841	0.865977	0.773267	0.802676	0.576380
# Train	0.995697	0.996009	0.999902	0.995382	0.996012	0.991395

In [22]:
# small 8, 2
# Accuracy	Precision	AUC	Sensitivity	Specificity	MCC
# Train_Test						
# Test	0.795976	0.807037	0.866398	0.778376	0.813600	0.592587
# Train	0.995697	0.996638	0.999890	0.994752	0.996642	0.991400

In [23]:
evaluations_df

Unnamed: 0,Fold,Train_Test,Accuracy,Precision,TPR,FPR,TPR_FPR_Thresholds,AUC,Sensitivity,Specificity,MCC
0,0,Train,0.995276,0.996839,"[0.0, 0.0021008403361344537, 0.004201680672268...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[1.9999999, 0.9999999, 0.99999976, 0.9999993, ...",0.998789,0.993697,0.996852,0.990556
1,0,Test,0.790356,0.803493,"[0.0, 0.0041841004184100415, 0.012552301255230...","[0.0, 0.0, 0.0, 0.004201680672268907, 0.004201...","[1.9999998, 0.99999976, 0.9999958, 0.99999285,...",0.834289,0.769874,0.810924,0.581259
2,1,Train,0.987402,0.991534,"[0.0, 0.001049317943336831, 0.0031479538300104...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[2.0, 1.0, 0.9999999, 0.9999994, 0.99999833, 0...",0.997115,0.983211,0.991597,0.974838
3,1,Test,0.815514,0.878788,"[0.0, 0.004201680672268907, 0.0714285714285714...","[0.0, 0.0, 0.0, 0.0041841004184100415, 0.00418...","[2.0, 1.0, 0.99970067, 0.99963355, 0.99957556,...",0.881254,0.731092,0.899582,0.639967
4,2,Train,0.992655,0.992655,"[0.0, 0.00944386149003148, 0.01678908709338929...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[2.0, 1.0, 0.9999999, 0.99999976, 0.99999964, ...",0.998566,0.992655,0.992655,0.98531
5,2,Test,0.796218,0.802575,"[0.0, 0.008403361344537815, 0.0252100840336134...","[0.0, 0.0, 0.0, 0.004201680672268907, 0.004201...","[2.0, 1.0, 0.99999976, 0.9999995, 0.9999554, 0...",0.847821,0.785714,0.806723,0.592568
6,3,Train,0.975866,0.964176,"[0.0, 0.001049317943336831, 0.0115424973767051...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[2.0, 1.0, 0.99999344, 0.9999927, 0.9999895, 0...",0.998164,0.988458,0.963274,0.952033
7,3,Test,0.777311,0.759843,"[0.0, 0.004201680672268907, 0.1386554621848739...","[0.0, 0.0, 0.0, 0.004201680672268907, 0.004201...","[1.9999993, 0.9999993, 0.9985587, 0.998487, 0....",0.847857,0.810924,0.743697,0.555879
8,4,Train,0.988458,0.983385,"[0.0, 0.001049317943336831, 0.0031479538300104...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[2.0, 1.0, 0.99999976, 0.9999993, 0.99999917, ...",0.998975,0.993704,0.983211,0.976969
9,4,Test,0.821429,0.809717,"[0.0, 0.004201680672268907, 0.1176470588235294...","[0.0, 0.0, 0.0, 0.004201680672268907, 0.004201...","[1.9999987, 0.9999987, 0.9996489, 0.9996474, 0...",0.885919,0.840336,0.802521,0.643317


In [24]:
# small 8, 2
# 	Fold	Train_Test	Accuracy	Precision	TPR	FPR	TPR_FPR_Thresholds	AUC	Sensitivity	Specificity	MCC
# 0	0	Train	0.994751	0.994748	[0.0, 0.0010504201680672268, 0.103991596638655...	[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...	[1.9849311, 0.9849311, 0.92211884, 0.92180663,...	0.999930	0.994748	0.994753	0.989501
# 1	0	Test	0.771488	0.795455	[0.0, 0.0041841004184100415, 0.020920502092050...	[0.0, 0.0, 0.0, 0.004201680672268907, 0.004201...	[1.9901093, 0.9901093, 0.9751286, 0.9726768, 0...	0.857600	0.732218	0.810924	0.544782
# 2	1	Train	0.994751	0.997888	[0.0, 0.001049317943336831, 0.1248688352570829...	[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...	[1.9553037, 0.9553036, 0.89843416, 0.89823747,...	0.999837	0.991605	0.997899	0.989521
# 3	1	Test	0.805031	0.811159	[0.0, 0.004201680672268907, 0.0462184873949579...	[0.0, 0.0, 0.0, 0.0041841004184100415, 0.00418...	[1.9966587, 0.9966587, 0.96096516, 0.96064854,...	0.885157	0.794118	0.815900	0.610178
# 4	2	Train	0.997901	0.996859	[0.0, 0.001049317943336831, 0.1101783840503672...	[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...	[1.9665295, 0.9665295, 0.92234534, 0.9217606, ...	0.999968	0.998951	0.996852	0.995805
# 5	2	Test	0.779412	0.775934	[0.0, 0.004201680672268907, 0.0546218487394958...	[0.0, 0.0, 0.0, 0.004201680672268907, 0.004201...	[1.9883118, 0.9883118, 0.957935, 0.9533922, 0....	0.838580	0.785714	0.773109	0.558868
# 6	3	Train	0.995803	0.995803	[0.0, 0.001049317943336831, 0.1185729275970619...	[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...	[1.9773536, 0.9773536, 0.93449837, 0.9342757, ...	0.999827	0.995803	0.995803	0.991605
# 7	3	Test	0.836134	0.850877	[0.0, 0.004201680672268907, 0.0378151260504201...	[0.0, 0.0, 0.0, 0.004201680672268907, 0.004201...	[1.986332, 0.98633206, 0.97150844, 0.97079784,...	0.889556	0.815126	0.857143	0.672863
# 8	4	Train	0.995278	0.997890	[0.0, 0.001049317943336831, 0.1049317943336831...	[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...	[1.961971, 0.961971, 0.9118028, 0.9117352, 0.9...	0.999890	0.992655	0.997901	0.990570
# 9	4	Test	0.787815	0.801762	[0.0, 0.004201680672268907, 0.0504201680672268...	[0.0, 0.0, 0.0, 0.004201680672268907, 0.004201...	[1.9933276, 0.9933276, 0.9593585, 0.95925444, ...	0.861097	0.764706	0.810924	0.576246

# Independent data

In [25]:
##################################################################################
##### Read CSV data
##################################################################################

indpe_data_filepath = os.path.join(input_data_folder, indpe_data_filename)
indpe_data = pd.read_csv(indpe_data_filepath, sep=',', header=0)
indpe_data = indpe_data.drop('label', axis=1)

indpe_data['label'] = pd.Series([int(val.split('_')[-2])
                                 for val in indpe_data['nameseq']])

indpe_data = indpe_data.drop('nameseq', axis=1)

##################################################################################
##### Extract features and labels, create folds
##################################################################################

indpe_features = np.array(indpe_data.drop('label', axis=1))
indpe_labels = np.array(indpe_data['label'])
indpe_labels = indpe_labels.reshape((indpe_labels.shape[0], 1))

## Using k-fold Models

### Performance of each k-fold model

In [26]:
## create the evaluation data structure for all iterations
evaluations = {
    "Fold" : [],
    "Train_Test" : [],
    "Accuracy" : [],
    "Precision": [],
    "TPR": [],
    "FPR": [],
    "TPR_FPR_Thresholds": [],
    "AUC": [],
    "Sensitivity": [],
    "Specificity": [],
    "MCC":[]
}

##################################################################################
##### Prediction and metrics for Independent dataset
##################################################################################

for i in range(n_fold):
    
    current_model_path = os.path.join(modelPath, "bestModel-fold{}.hdf5".format(i))
    model = tf.keras.models.load_model(current_model_path)

    y_pred = model.predict(indpe_features)
    label_pred = pred2label(y_pred)

    # Compute precision, recall, sensitivity, specifity, mcc
    acc = accuracy_score(indpe_labels, label_pred)
    prec = precision_score(indpe_labels,label_pred)
    mcc = matthews_corrcoef(indpe_labels, label_pred)

    conf = confusion_matrix(indpe_labels, label_pred)
    tn, fp, fn, tp = conf.ravel()
    sens = tp/(tp+fn)
    spec = tn/(tn+fp)
    
    fpr, tpr, thresholds = roc_curve(indpe_labels, y_pred)
    auc = roc_auc_score(indpe_labels, y_pred)

    evaluations["Fold"].append(i)
    evaluations["Train_Test"].append("Independent")
    evaluations["Accuracy"].append(acc)
    evaluations["Precision"].append(prec)
    evaluations["TPR"].append(tpr)
    evaluations["FPR"].append(fpr)
    evaluations["TPR_FPR_Thresholds"].append(thresholds)
    evaluations["AUC"].append(auc)
    evaluations["Sensitivity"].append(sens)
    evaluations["Specificity"].append(spec)
    evaluations["MCC"].append(mcc)
    
    del model
    tf.keras.backend.clear_session()

##################################################################################

evaluations_df = pd.DataFrame.from_dict(evaluations)

evaluations_df_grouped = evaluations_df.groupby(["Train_Test"]).mean().filter(['Accuracy', 
                                                                               'Precision', 
                                                                               'AUC', 
                                                                               'Sensitivity', 
                                                                               'Specificity', 
                                                                               'MCC'])

evaluations_df_grouped

Unnamed: 0_level_0,Accuracy,Precision,AUC,Sensitivity,Specificity,MCC
Train_Test,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Independent,0.586939,0.225012,0.638483,0.609852,0.582387,0.144085


In [27]:
evaluations_df

Unnamed: 0,Fold,Train_Test,Accuracy,Precision,TPR,FPR,TPR_FPR_Thresholds,AUC,Sensitivity,Specificity,MCC
0,0,Independent,0.595102,0.223062,"[0.0, 0.0049261083743842365, 0.019704433497536...","[0.0, 0.0, 0.0, 0.0009784735812133072, 0.00097...","[1.9999998, 0.99999976, 0.9999993, 0.9999963, ...",0.640777,0.581281,0.597847,0.134464
1,1,Independent,0.619592,0.22547,"[0.0, 0.009852216748768473, 0.0147783251231527...","[0.0, 0.0, 0.0, 0.0029354207436399216, 0.00293...","[1.9999999, 0.9999999, 0.99999976, 0.9999982, ...",0.621634,0.53202,0.636986,0.128777
2,2,Independent,0.600816,0.233209,"[0.0, 0.019704433497536946, 0.0246305418719211...","[0.0, 0.0, 0.0009784735812133072, 0.0009784735...","[2.0, 1.0, 0.9999999, 0.99999976, 0.9999995, 0...",0.637189,0.615764,0.597847,0.160105
3,3,Independent,0.563265,0.226073,"[0.0, 0.0049261083743842365, 0.024630541871921...","[0.0, 0.0, 0.0, 0.004892367906066536, 0.006849...","[1.9999994, 0.9999994, 0.99999154, 0.99998355,...",0.65496,0.674877,0.541096,0.160617
4,4,Independent,0.555918,0.217247,"[0.0, 0.0049261083743842365, 0.009852216748768...","[0.0, 0.0, 0.0, 0.0, 0.0009784735812133072, 0....","[1.9999996, 0.99999964, 0.99999917, 0.9999989,...",0.637854,0.64532,0.53816,0.136461


### Mean score with k-fold models

In [28]:
## create the evaluation data structure for all iterations
evaluations = {
    "Train_Test" : [],
    "Accuracy" : [],
    "Precision": [],
    "TPR": [],
    "FPR": [],
    "TPR_FPR_Thresholds": [],
    "AUC": [],
    "Sensitivity": [],
    "Specificity": [],
    "MCC":[]
}

##################################################################################
##### Prediction and metrics for Independent dataset
##################################################################################

total_pred = np.zeros(indpe_labels.shape)
all_preds = []

for i in range(n_fold):
    
    current_model_path = os.path.join(modelPath, "bestModel-fold{}.hdf5".format(i))
    model = tf.keras.models.load_model(current_model_path)

    y_pred = model.predict(indpe_features)
    total_pred += y_pred
    all_preds.append(y_pred)
    
    del model
    tf.keras.backend.clear_session()
    
total_pred = total_pred / n_fold
label_pred = pred2label(total_pred)

# Compute precision, recall, sensitivity, specifity, mcc
acc = accuracy_score(indpe_labels, label_pred)
prec = precision_score(indpe_labels,label_pred)
mcc = matthews_corrcoef(indpe_labels, label_pred)

conf = confusion_matrix(indpe_labels, label_pred)
tn, fp, fn, tp = conf.ravel()
sens = tp/(tp+fn)
spec = tn/(tn+fp)

fpr, tpr, thresholds = roc_curve(indpe_labels, total_pred)
auc = roc_auc_score(indpe_labels, total_pred)

evaluations["Train_Test"].append("Independent")
evaluations["Accuracy"].append(acc)
evaluations["Precision"].append(prec)
evaluations["TPR"].append(tpr)
evaluations["FPR"].append(fpr)
evaluations["TPR_FPR_Thresholds"].append(thresholds)
evaluations["AUC"].append(auc)
evaluations["Sensitivity"].append(sens)
evaluations["Specificity"].append(spec)
evaluations["MCC"].append(mcc)

##################################################################################

evaluations_df = pd.DataFrame.from_dict(evaluations)

evaluations_df_grouped = evaluations_df.groupby(["Train_Test"]).mean().filter(['Accuracy', 
                                                                               'Precision', 
                                                                               'AUC', 
                                                                               'Sensitivity', 
                                                                               'Specificity', 
                                                                               'MCC'])

evaluations_df_grouped

Unnamed: 0_level_0,Accuracy,Precision,AUC,Sensitivity,Specificity,MCC
Train_Test,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Independent,0.601633,0.231638,0.647562,0.605911,0.600783,0.155087


### Voting score with k-fold models

In [29]:
## create the evaluation data structure for all iterations
evaluations = {
    "Train_Test" : [],
    "Accuracy" : [],
    "Precision": [],
    "TPR": [],
    "FPR": [],
    "TPR_FPR_Thresholds": [],
    "AUC": [],
    "Sensitivity": [],
    "Specificity": [],
    "MCC":[]
}

##################################################################################
##### Prediction and metrics for Independent dataset
##################################################################################

total_pred = np.zeros(indpe_labels.shape)
all_preds = []

for i in range(n_fold):
    
    current_model_path = os.path.join(modelPath, "bestModel-fold{}.hdf5".format(i))
    model = tf.keras.models.load_model(current_model_path)

    y_pred = model.predict(indpe_features)
    vote_pred = pred2label(y_pred)
    total_pred += vote_pred
    all_preds.append(vote_pred)
    
    del model
    tf.keras.backend.clear_session()
    
total_pred = total_pred / n_fold
label_pred = pred2label(total_pred)

# Compute precision, recall, sensitivity, specifity, mcc
acc = accuracy_score(indpe_labels, label_pred)
prec = precision_score(indpe_labels,label_pred)
mcc = matthews_corrcoef(indpe_labels, label_pred)

conf = confusion_matrix(indpe_labels, label_pred)
tn, fp, fn, tp = conf.ravel()
sens = tp/(tp+fn)
spec = tn/(tn+fp)

fpr, tpr, thresholds = roc_curve(indpe_labels, total_pred)
auc = roc_auc_score(indpe_labels, total_pred)

evaluations["Train_Test"].append("Independent")
evaluations["Accuracy"].append(acc)
evaluations["Precision"].append(prec)
evaluations["TPR"].append(tpr)
evaluations["FPR"].append(fpr)
evaluations["TPR_FPR_Thresholds"].append(thresholds)
evaluations["AUC"].append(auc)
evaluations["Sensitivity"].append(sens)
evaluations["Specificity"].append(spec)
evaluations["MCC"].append(mcc)

##################################################################################

evaluations_df = pd.DataFrame.from_dict(evaluations)

evaluations_df_grouped = evaluations_df.groupby(["Train_Test"]).mean().filter(['Accuracy', 
                                                                               'Precision', 
                                                                               'AUC', 
                                                                               'Sensitivity', 
                                                                               'Specificity', 
                                                                               'MCC'])

evaluations_df_grouped

Unnamed: 0_level_0,Accuracy,Precision,AUC,Sensitivity,Specificity,MCC
Train_Test,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Independent,0.596735,0.231054,0.625382,0.615764,0.592955,0.156282


## Using New Model

Train model on full data from training. Predict and evaluate on Independent data.

In [30]:
model = DLNN_Classifier(input_vec_shape = input_vec_shape)
    
## Define the model callbacks for early stopping and saving the model. Then train model
current_model_path = os.path.join(modelPath, "_fullModel.hdf5")
modelCallbacks = [
    tf.keras.callbacks.ModelCheckpoint(current_model_path,
                                       monitor = monitor, verbose = 1, save_best_only = True, 
                                       save_weights_only = False, mode = 'auto', save_freq = 'epoch'),
]

# adding random shuffling of the dataset for training purpose
index_arr = np.arange(train_features.shape[0])
index_arr = np.random.permutation(index_arr)

model.fit(x = train_features[index_arr], y = train_labels[index_arr], batch_size = batch_size, epochs = epochs, verbose = 1, 
          callbacks = modelCallbacks, validation_data = (indpe_features, indpe_labels))
# model.fit(x = train_features[index_arr], y = train_labels[index_arr], batch_size = batch_size, epochs = epochs, verbose = 1, 
#           callbacks = modelCallbacks, validation_split = 0.2)

del model
tf.keras.backend.clear_session()

model = tf.keras.models.load_model(current_model_path)

Epoch 1/200
Epoch 1: val_accuracy improved from -inf to 0.54612, saving model to Results\NT_Site_PredNTS_Classification_DLNN_Kmer\5fold\models\_fullModel.hdf5
Epoch 2/200
Epoch 2: val_accuracy improved from 0.54612 to 0.71347, saving model to Results\NT_Site_PredNTS_Classification_DLNN_Kmer\5fold\models\_fullModel.hdf5
Epoch 3/200
Epoch 3: val_accuracy did not improve from 0.71347
Epoch 4/200
Epoch 4: val_accuracy did not improve from 0.71347
Epoch 5/200
Epoch 5: val_accuracy did not improve from 0.71347
Epoch 6/200
Epoch 6: val_accuracy did not improve from 0.71347
Epoch 7/200
Epoch 7: val_accuracy did not improve from 0.71347
Epoch 8/200
Epoch 8: val_accuracy did not improve from 0.71347
Epoch 9/200
Epoch 9: val_accuracy did not improve from 0.71347
Epoch 10/200
Epoch 10: val_accuracy did not improve from 0.71347
Epoch 11/200
Epoch 11: val_accuracy did not improve from 0.71347
Epoch 12/200
Epoch 12: val_accuracy did not improve from 0.71347
Epoch 13/200
Epoch 13: val_accuracy did not

Epoch 30/200
Epoch 30: val_accuracy did not improve from 0.71347
Epoch 31/200
Epoch 31: val_accuracy did not improve from 0.71347
Epoch 32/200
Epoch 32: val_accuracy did not improve from 0.71347
Epoch 33/200
Epoch 33: val_accuracy did not improve from 0.71347
Epoch 34/200
Epoch 34: val_accuracy did not improve from 0.71347
Epoch 35/200
Epoch 35: val_accuracy did not improve from 0.71347
Epoch 36/200
Epoch 36: val_accuracy did not improve from 0.71347
Epoch 37/200
Epoch 37: val_accuracy did not improve from 0.71347
Epoch 38/200
Epoch 38: val_accuracy did not improve from 0.71347
Epoch 39/200
Epoch 39: val_accuracy did not improve from 0.71347
Epoch 40/200
Epoch 40: val_accuracy did not improve from 0.71347
Epoch 41/200
Epoch 41: val_accuracy did not improve from 0.71347
Epoch 42/200
Epoch 42: val_accuracy did not improve from 0.71347
Epoch 43/200
Epoch 43: val_accuracy did not improve from 0.71347
Epoch 44/200
Epoch 44: val_accuracy did not improve from 0.71347
Epoch 45/200
Epoch 45: va

Epoch 59: val_accuracy did not improve from 0.71755
Epoch 60/200
Epoch 60: val_accuracy did not improve from 0.71755
Epoch 61/200
Epoch 61: val_accuracy did not improve from 0.71755
Epoch 62/200
Epoch 62: val_accuracy did not improve from 0.71755
Epoch 63/200
Epoch 63: val_accuracy did not improve from 0.71755
Epoch 64/200
Epoch 64: val_accuracy did not improve from 0.71755
Epoch 65/200
Epoch 65: val_accuracy did not improve from 0.71755
Epoch 66/200
Epoch 66: val_accuracy did not improve from 0.71755
Epoch 67/200
Epoch 67: val_accuracy did not improve from 0.71755
Epoch 68/200
Epoch 68: val_accuracy did not improve from 0.71755
Epoch 69/200
Epoch 69: val_accuracy did not improve from 0.71755
Epoch 70/200
Epoch 70: val_accuracy did not improve from 0.71755
Epoch 71/200
Epoch 71: val_accuracy did not improve from 0.71755
Epoch 72/200
Epoch 72: val_accuracy did not improve from 0.71755
Epoch 73/200
Epoch 73: val_accuracy did not improve from 0.71755
Epoch 74/200
Epoch 74: val_accuracy di

Epoch 89/200
Epoch 89: val_accuracy did not improve from 0.71755
Epoch 90/200
Epoch 90: val_accuracy did not improve from 0.71755
Epoch 91/200
Epoch 91: val_accuracy did not improve from 0.71755
Epoch 92/200
Epoch 92: val_accuracy did not improve from 0.71755
Epoch 93/200
Epoch 93: val_accuracy did not improve from 0.71755
Epoch 94/200
Epoch 94: val_accuracy did not improve from 0.71755
Epoch 95/200
Epoch 95: val_accuracy did not improve from 0.71755
Epoch 96/200
Epoch 96: val_accuracy did not improve from 0.71755
Epoch 97/200
Epoch 97: val_accuracy did not improve from 0.71755
Epoch 98/200
Epoch 98: val_accuracy did not improve from 0.71755
Epoch 99/200
Epoch 99: val_accuracy did not improve from 0.71755
Epoch 100/200
Epoch 100: val_accuracy did not improve from 0.71755
Epoch 101/200
Epoch 101: val_accuracy did not improve from 0.71755
Epoch 102/200
Epoch 102: val_accuracy did not improve from 0.71755
Epoch 103/200
Epoch 103: val_accuracy did not improve from 0.71755
Epoch 104/200
Epo

Epoch 118/200
Epoch 118: val_accuracy did not improve from 0.72082
Epoch 119/200
Epoch 119: val_accuracy did not improve from 0.72082
Epoch 120/200
Epoch 120: val_accuracy did not improve from 0.72082
Epoch 121/200
Epoch 121: val_accuracy did not improve from 0.72082
Epoch 122/200
Epoch 122: val_accuracy did not improve from 0.72082
Epoch 123/200
Epoch 123: val_accuracy did not improve from 0.72082
Epoch 124/200
Epoch 124: val_accuracy did not improve from 0.72082
Epoch 125/200
Epoch 125: val_accuracy did not improve from 0.72082
Epoch 126/200
Epoch 126: val_accuracy did not improve from 0.72082
Epoch 127/200
Epoch 127: val_accuracy did not improve from 0.72082
Epoch 128/200
Epoch 128: val_accuracy did not improve from 0.72082
Epoch 129/200
Epoch 129: val_accuracy did not improve from 0.72082
Epoch 130/200
Epoch 130: val_accuracy did not improve from 0.72082
Epoch 131/200
Epoch 131: val_accuracy did not improve from 0.72082
Epoch 132/200
Epoch 132: val_accuracy did not improve from 0.7

Epoch 147/200
Epoch 147: val_accuracy did not improve from 0.72082
Epoch 148/200
Epoch 148: val_accuracy did not improve from 0.72082
Epoch 149/200
Epoch 149: val_accuracy did not improve from 0.72082
Epoch 150/200
Epoch 150: val_accuracy did not improve from 0.72082
Epoch 151/200
Epoch 151: val_accuracy did not improve from 0.72082
Epoch 152/200
Epoch 152: val_accuracy did not improve from 0.72082
Epoch 153/200
Epoch 153: val_accuracy did not improve from 0.72082
Epoch 154/200
Epoch 154: val_accuracy did not improve from 0.72082
Epoch 155/200
Epoch 155: val_accuracy did not improve from 0.72082
Epoch 156/200
Epoch 156: val_accuracy did not improve from 0.72082
Epoch 157/200
Epoch 157: val_accuracy did not improve from 0.72082
Epoch 158/200
Epoch 158: val_accuracy did not improve from 0.72082
Epoch 159/200
Epoch 159: val_accuracy did not improve from 0.72082
Epoch 160/200
Epoch 160: val_accuracy did not improve from 0.72082
Epoch 161/200
Epoch 161: val_accuracy did not improve from 0.7

Epoch 176: val_accuracy did not improve from 0.72082
Epoch 177/200
Epoch 177: val_accuracy did not improve from 0.72082
Epoch 178/200
Epoch 178: val_accuracy did not improve from 0.72082
Epoch 179/200
Epoch 179: val_accuracy did not improve from 0.72082
Epoch 180/200
Epoch 180: val_accuracy did not improve from 0.72082
Epoch 181/200
Epoch 181: val_accuracy did not improve from 0.72082
Epoch 182/200
Epoch 182: val_accuracy did not improve from 0.72082
Epoch 183/200
Epoch 183: val_accuracy did not improve from 0.72082
Epoch 184/200
Epoch 184: val_accuracy did not improve from 0.72082
Epoch 185/200
Epoch 185: val_accuracy did not improve from 0.72082
Epoch 186/200
Epoch 186: val_accuracy did not improve from 0.72082
Epoch 187/200
Epoch 187: val_accuracy did not improve from 0.72082
Epoch 188/200
Epoch 188: val_accuracy did not improve from 0.72082
Epoch 189/200
Epoch 189: val_accuracy did not improve from 0.72082
Epoch 190/200
Epoch 190: val_accuracy did not improve from 0.72082
Epoch 191

In [31]:
## create the evaluation data structure for all iterations
evaluations = {
    "Train_Test" : [],
    "Accuracy" : [],
    "Precision": [],
    "TPR": [],
    "FPR": [],
    "TPR_FPR_Thresholds": [],
    "AUC": [],
    "Sensitivity": [],
    "Specificity": [],
    "MCC":[]
}

##################################################################################
##### Prediction and metrics for Independent dataset
##################################################################################

y_pred = model.predict(indpe_features)
label_pred = pred2label(y_pred)

# Compute precision, recall, sensitivity, specifity, mcc
acc = accuracy_score(indpe_labels, label_pred)
prec = precision_score(indpe_labels,label_pred)
mcc = matthews_corrcoef(indpe_labels, label_pred)

conf = confusion_matrix(indpe_labels, label_pred)
tn, fp, fn, tp = conf.ravel()
sens = tp/(tp+fn)
spec = tn/(tn+fp)

fpr, tpr, thresholds = roc_curve(indpe_labels, y_pred)
auc = roc_auc_score(indpe_labels, y_pred)

evaluations["Train_Test"].append("Independent")
evaluations["Accuracy"].append(acc)
evaluations["Precision"].append(prec)
evaluations["TPR"].append(tpr)
evaluations["FPR"].append(fpr)
evaluations["TPR_FPR_Thresholds"].append(thresholds)
evaluations["AUC"].append(auc)
evaluations["Sensitivity"].append(sens)
evaluations["Specificity"].append(spec)
evaluations["MCC"].append(mcc)

del model
tf.keras.backend.clear_session()

##################################################################################

evaluations_df = pd.DataFrame.from_dict(evaluations)

evaluations_df_grouped = evaluations_df.groupby(["Train_Test"]).mean().filter(['Accuracy', 
                                                                               'Precision', 
                                                                               'AUC', 
                                                                               'Sensitivity', 
                                                                               'Specificity', 
                                                                               'MCC'])

evaluations_df_grouped

Unnamed: 0_level_0,Accuracy,Precision,AUC,Sensitivity,Specificity,MCC
Train_Test,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Independent,0.720816,0.287462,0.641512,0.463054,0.772016,0.197587


In [32]:
# 	Accuracy	Precision	AUC	Sensitivity	Specificity	MCC
# Train_Test						
# Independent	0.701224	0.26513	0.656614	0.453202	0.750489	0.168087

In [33]:
print(classification_report(indpe_labels, np.round(y_pred).astype(int)))

              precision    recall  f1-score   support

           0       0.88      0.77      0.82      1022
           1       0.29      0.46      0.35       203

    accuracy                           0.72      1225
   macro avg       0.58      0.62      0.59      1225
weighted avg       0.78      0.72      0.74      1225



In [34]:
# 2048
# 5
# 0.001

#               precision    recall  f1-score   support

#            0       0.89      0.72      0.79      1022
#            1       0.28      0.54      0.36       203

#     accuracy                           0.69      1225
#    macro avg       0.58      0.63      0.58      1225
# weighted avg       0.79      0.69      0.72      1225

In [35]:
# 2048
# 5

#               precision    recall  f1-score   support

#            0       0.88      0.78      0.83      1022
#            1       0.29      0.45      0.35       203

#     accuracy                           0.73      1225
#    macro avg       0.58      0.62      0.59      1225
# weighted avg       0.78      0.73      0.75      1225

In [36]:
# 1024
# 5

# Accuracy	Precision	AUC	Sensitivity	Specificity	MCC
# Train_Test						
# Independent	0.682449	0.259067	0.633333	0.492611	0.720157	0.170296

#               precision    recall  f1-score   support

#            0       0.88      0.72      0.79      1022
#            1       0.26      0.49      0.34       203

#     accuracy                           0.68      1225
#    macro avg       0.57      0.61      0.57      1225
# weighted avg       0.77      0.68      0.72      1225

In [37]:
# 256
# 6
# precision    recall  f1-score   support

#            0       0.88      0.68      0.77      1022
#            1       0.24      0.52      0.33       203

#     accuracy                           0.65      1225
#    macro avg       0.56      0.60      0.55      1225
# weighted avg       0.77      0.65      0.69      1225

In [38]:
# 128
# 5
#               precision    recall  f1-score   support

#            0       0.88      0.62      0.73      1022
#            1       0.23      0.59      0.34       203

#     accuracy                           0.61      1225
#    macro avg       0.56      0.60      0.53      1225
# weighted avg       0.78      0.61      0.66      1225