In [1]:
##################################################################################
##### Define all parameters for model tuning
##################################################################################

n_fold = 5
expName = "NT_Site_PredNTS_Classification_DLNN_CORENup"
outPath = "Results"
foldName = "folds.pickle"

epochs = 100
batch_size = 64
shuffle = True
seed = None

input_data_folder = "Data"
training_data_file = "Training-datasets-PredNTS.txt"
independent_data_file = "independent dataset-PredNTS.txt"

In [2]:
import os
import pickle
import numpy as np
import pandas as pd

# import tensorflow as tf

from sklearn.metrics import roc_auc_score
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_curve, auc, accuracy_score, precision_score, confusion_matrix
from sklearn.metrics import roc_auc_score, matthews_corrcoef

import math

In [3]:
# print(tf.test.is_gpu_available(cuda_only=True))
# physical_devices = tf.config.experimental.list_physical_devices('GPU')
physical_devices = tf.config.list_physical_devices('GPU')
print(physical_devices)
tf.config.experimental.set_memory_growth(physical_devices[0], True)

NameError: name 'tf' is not defined

In [4]:
##################################################################################
##### define all CUSTOM functions
##################################################################################

def one_hot_encode_nt(sequence, char_dict):
    
    seq_encoded = np.zeros((len(sequence),len(char_dict)))
    
    i = 0
    for single_character in sequence:
        if(single_character.upper() in char_dict.keys()):
            seq_encoded[i][char_dict[single_character.upper()]] = 1
            i = i+1
        else:
            raise ValueError('Incorrect character in NT sequence: '+sequence)
    return seq_encoded

In [5]:
##################################################################################
##### Build k-fold functions
##################################################################################

## Build the K-fold from dataset
def build_kfold(features, labels, k=10, shuffle=False, seed=None):
    
    skf = StratifiedKFold(n_splits=k, shuffle=shuffle, random_state=seed)
    kfoldList = []
    for train_index, test_index in skf.split(features, labels):
        X_train, X_test = features[train_index], features[test_index]
        y_train, y_test = labels[train_index], labels[test_index]
        kfoldList.append({
            "X_train": X_train,
            "X_test": X_test,
            "y_train":y_train,
            "y_test":y_test
        })
    return kfoldList

In [6]:
##################################################################################
##### define evaluator functions
##################################################################################

def pred2label(y_pred):
    y_pred = np.round(y_pred)
    return y_pred

In [7]:
##################################################################################
##### Function to customize the DLNN architecture with parameters
##################################################################################

def DLNN_CORENup(input_seq_shape = (41, 21),
                 conv_filters_per_layer_1 = 50, kernel_length_1 = 5, conv_strides_1 = 1, ## 1st Convolutional layer parameters
                 max_pool_width_1 = 2, max_pool_stride_1 = 2, ## 1st Maxpool layer parameters
                 lstm_decode_units = 50, ## LSTM layer parameters
                 conv_filters_per_layer_2 = 50,  kernel_length_2 = 10, conv_strides_2 = 1, ## 2nd Convolutional layer parameters
                 max_pool_width_2 = 2, max_pool_stride_2 = 2, ## 2nd Maxpool layer parameters
                 dense_decode_units = 370, ## Dense layer parameters
                 prob = 0.5, learn_rate = 0.0003, loss = 'binary_crossentropy', metrics = None):
    
    beta = 0.001
    
    ######################################################################################################
    ########  SEQUENCE  ##################################################################################
    ######################################################################################################
    
    input1 = tf.keras.layers.Input(shape=input_seq_shape)

    x1 = tf.keras.layers.Conv1D(conv_filters_per_layer_1, kernel_length_1,
                                strides = conv_strides_1, 
                                kernel_regularizer = tf.keras.regularizers.l2(beta), 
                                padding = "same")(input1)
    x1 = tf.keras.layers.Activation('relu')(x1)
    x1 = tf.keras.layers.MaxPool1D(pool_size = max_pool_width_1, strides = max_pool_stride_1)(x1)
    x1 = tf.keras.layers.Dropout(prob)(x1)

    ## LSTM Path

    x2 = tf.keras.layers.LSTM(lstm_decode_units, return_sequences = True, 
                              kernel_regularizer = tf.keras.regularizers.l2(beta))(x1)
    x2 = tf.keras.layers.Dropout(prob)(x2)
    
    x2 = tf.keras.layers.Flatten()(x2)

    ## Conv Path

    x3 = tf.keras.layers.Conv1D(conv_filters_per_layer_2, kernel_length_2, 
                                strides = conv_strides_2, 
                                kernel_regularizer = tf.keras.regularizers.l2(beta), 
                                padding = 'same')(x1)
    x3 = tf.keras.layers.Activation('relu')(x3)
    x3 = tf.keras.layers.MaxPooling1D(pool_size = max_pool_width_2, strides = max_pool_stride_2)(x3)
    x3 = tf.keras.layers.Dropout(prob)(x3)
    
    x3 = tf.keras.layers.Flatten()(x3)
    
    x4 = tf.keras.layers.Concatenate(1)([x2,x3])
    
    ######################################################################################################
    ########  Classifier  ################################################################################
    ######################################################################################################
    
    y = tf.keras.layers.Dense(dense_decode_units, 
                              kernel_regularizer = tf.keras.regularizers.l2(beta), 
                              activation = 'relu')(x4)
    
    y = tf.keras.layers.Dropout(prob)(y)
    
    y = tf.keras.layers.Dense(1, 
                              kernel_regularizer = tf.keras.regularizers.l2(beta), 
                              activation = 'sigmoid')(y)

    ## Generate Model from input and output
    model = tf.keras.models.Model(inputs=input1, outputs=y)
    
    ## Compile model
    if(metrics != None):
        model.compile(optimizer = tf.keras.optimizers.Adam(learning_rate=learn_rate), 
                      loss = loss, metrics = metrics)
    else:
        model.compile(optimizer = tf.keras.optimizers.Adam(learning_rate=learn_rate), 
                      loss = loss)

    return model

In [8]:
DLNN_CORENup().summary()

NameError: name 'tf' is not defined

# Training

In [9]:
##################################################################################
##### read training file
##################################################################################
train_file_path = os.path.join(input_data_folder, training_data_file)
train_data = pd.read_csv(train_file_path, sep='\t', header=None)
train_data.columns = ['Sequence', 'name', 'id', 'flag', 'label_original', 'type']

##################################################################################
##### Create dictionary of all characters in the NT sequence 
##################################################################################
all_char_set = set({})
for val in [set(val) for val in train_data['Sequence']]:
    all_char_set = all_char_set.union(val)
all_char_list = list(all_char_set)
all_char_list.sort()
all_char_dict = {}
for i in range(len(all_char_list)):
    all_char_dict[all_char_list[i]] = i
    
##################################################################################
##### Create OHE of sequence
##################################################################################
train_data['OHE_Sequence'] = pd.Series([one_hot_encode_nt(val, all_char_dict) 
                                        for val in train_data["Sequence"]])

##################################################################################
##### Fix the labels
##################################################################################
train_data['label'] = pd.Series([1 if val == 1 else 0 
                                 for val in train_data["label_original"]])

##################################################################################
##### Extract features and labels, create folds
##################################################################################

features = np.array(list(train_data['OHE_Sequence']))
labels = np.array(list(train_data['label']))
labels = labels.reshape((labels.shape[0], 1))

input_seq_shape = features[0].shape

folds = build_kfold(features, labels, k=n_fold, shuffle=shuffle, seed=seed)

## Write the k-fold dataset to file
foldPath = os.path.join(outPath, expName, "{}fold".format(n_fold))
if(not os.path.isdir(foldPath)):
    os.makedirs(foldPath)
pickle.dump(folds, open(os.path.join(foldPath, foldName), "wb"))

In [30]:
half_seq_list = []
for val in train_data['Sequence']:
    half_seq_list.append(val[:20])
    half_seq_list.append(val[21:])

In [21]:
len(half_seq_list)

4764

In [22]:
len(list(dict.fromkeys(half_seq_list)))

4624

In [25]:
half_seq_list = []
for val, label in zip(train_data['Sequence'], train_data['label_original']):
    half_seq_list.append((val[:20], label))
    half_seq_list.append((val[21:], label))

In [26]:
len(half_seq_list)

4764

In [27]:
len(list(dict.fromkeys(half_seq_list)))

4652

In [32]:
half_seq_list = []
for val, label in zip(train_data['Sequence'], train_data['label_original']):
    half_seq_list.append((val[:21], label))
    half_seq_list.append((val[20:], label))

In [33]:
len(half_seq_list)

4764

In [34]:
len(list(dict.fromkeys(half_seq_list)))

4661

In [35]:
set([x for x in half_seq_list if half_seq_list.count(x) > 1])

{('-MCDAFVGTWKLVSSENFDDY', 1),
 ('ACFLDSMATLGLAAYGYGIRY', 1),
 ('AEKTKQGVAEAAGKTKEGVLY', 1),
 ('AIVGVWQERNAENAIEALKEY', -1),
 ('ALDFENEMATAASSSSLEKSY', 1),
 ('DEEDDSGKDKKKKTKKIKEKY', 1),
 ('EMIKSGMNVARLNFSHGTHEY', -1),
 ('FGGGTGSGFTSLLMERLSVDY', 1),
 ('GDYMNMSPVGDSNTSSPSECY', -1),
 ('GGSILASLSTFQQMWISKPEY', -1),
 ('GIALNDHFVKLISWYDNEFGY', 1),
 ('GRVTMRKTVAKPKGPSGSPWY', 1),
 ('GTAEVELKKGATLKITLDNAY', 1),
 ('IILNKGHDISADYWSLGILMY', 1),
 ('LDLAGRDLTDYLMKILTERGY', -1),
 ('LDLAGRDLTDYLMKILTERGY', 1),
 ('LNPDGSEKKDFYKDGKRLKNY', 1),
 ('NAVLDGADCIMLSGETAKGDY', -1),
 ('NIGHFNDPVHGGSWIRGAIYY', 1),
 ('NILWLDYKNICKVVEVGSKIY', -1),
 ('NYCYRCGNQAAIMELDDTLKY', 1),
 ('PRAPIIAVTRNPQTARQAHLY', -1),
 ('QKDSYVGDEAQSKRGILTLKY', -1),
 ('QLMKKEFTLEFSRDRKSMSVY', -1),
 ('QMSLLLRRPPGREAYPGDVFY', -1),
 ('RNAENAIEALKEYEPEMGKVY', -1),
 ('RSSASVSGSPSDGGFISSDEY', -1),
 ('SIVGRPRHQGVMVGMGQKDSY', -1),
 ('TEMMPAAYPPGGGSGGRLPGY', -1),
 ('TFDAGAGIALNDHFVKLISWY', 1),
 ('TPSQSSVVSIEEYTEMMPAAY', -1),
 ('VLADDNFSTIVAAVEEGRAI

In [37]:
half_seq_list = []
for val, label in zip(indpe_data['Sequence'], indpe_data['label_original']):
    half_seq_list.append((val[:21], label))
    half_seq_list.append((val[20:], label))

In [38]:
len(half_seq_list)

2450

In [39]:
len(list(dict.fromkeys(half_seq_list)))

2285

In [40]:
set([x for x in half_seq_list if half_seq_list.count(x) > 1])

{('AIVGVWQERNAENAIEALKEY', -1),
 ('ATICALCNDSALDYNEAKGVY', -1),
 ('DGSLVGDYGFDPFGLGKPAEY', -1),
 ('DPTGSYHGDSDLQLERINVYY', -1),
 ('EMEFTEAESNMNDLVSEYQQY', -1),
 ('ENIWLVGSICLSMSLHFLILY', -1),
 ('FKRISEQFTAMFRRKAFLHWY', -1),
 ('GGSILASLSTFQQMWISKPEY', -1),
 ('GKMGPGFTKALGHGVDLGHIY', -1),
 ('GMDEMEFTEAESNMNDLVSEY', -1),
 ('GQPLPFSISTLIWIEVLVIGY', -1),
 ('GTAEVELKKGATLKITLDNAY', -1),
 ('HDNPLRREEMHLEDSANFIKY', -1),
 ('IDNEALYDICFRTLKLTTPTY', -1),
 ('IDPTGSYHGDSDLQLERINVY', -1),
 ('IEHGIITNWDDMEKIWHHSFY', 1),
 ('IGAKFWEVISDEHGIDPTGSY', -1),
 ('ILIGETIKIVIEEYVQQLSGY', -1),
 ('IMNTFSVVPSPKVSDTVVEPY', -1),
 ('INIGHFNDPVHGGSWIRGAIY', -1),
 ('IVAAVEEGRAIYNNMKQFIRY', -1),
 ('KALGHGVDLGHIYGDNLERQY', -1),
 ('KANREKMTQIMFETFNVPAMY', -1),
 ('KITLDNAYMEKCDENILWLDY', -1),
 ('LGGGTGSGMGTLLISKIREEY', -1),
 ('MFETFNVPAMYVAIQAVLSLY', -1),
 ('MNKPPRNPKEPLISGWLFFRY', -1),
 ('NIGHFNDPVHGGSWIRGAIYY', -1),
 ('NILWLDYKNICKVVEVGSKIY', 1),
 ('NPKEPLISGWLFFRYLAIGCY', -1),
 ('PCCYYPCQHQGICVRFGLDRY', -1),
 ('PQSQMAV

In [10]:
##################################################################################
##### For each input file, train model and generate different outputs in a structured folder
##################################################################################

## create the evaluation data structure for all iterations
evaluations = {
    "Fold" : [],
    "Train_Test" : [],
    "Accuracy" : [],
    "Precision": [],
    "TPR": [],
    "FPR": [],
    "TPR_FPR_Thresholds": [],
    "AUC": [],
    "Sensitivity": [],
    "Specificity": [],
    "MCC":[]
}

##################################################################################
##### Train/Test model on all folds, generate evaluations
##################################################################################

## Create and set directory to save model
modelPath = os.path.join(outPath, expName, "{}fold".format(n_fold), "models")
if(not os.path.isdir(modelPath)):
    os.makedirs(modelPath)

i = -1
for fold in folds:
    i += 1
    
    print("\nTrain/Test model on Fold #"+str(i)+".")
    
    model = DLNN_CORENup(input_seq_shape = input_seq_shape)
    
    ## Define the model callbacks for early stopping and saving the model. Then train model
    current_model_path = os.path.join(modelPath, "bestModel-fold{}.hdf5".format(i))
    modelCallbacks = [
        tf.keras.callbacks.ModelCheckpoint(current_model_path,
                                           monitor = 'val_loss', verbose = 1, save_best_only = True, 
                                           save_weights_only = False, mode = 'auto', save_freq = 'epoch'),
    ]
    
    # adding random shuffling of the dataset for training purpose
    index_arr = np.arange(fold["X_train"].shape[0])
    index_arr = np.random.permutation(index_arr)
    
    model.fit(x = fold["X_train"][index_arr], y = fold["y_train"][index_arr], batch_size = batch_size, epochs = epochs, verbose = 1, 
              callbacks = modelCallbacks, validation_data = (fold["X_test"], fold["y_test"]))
    
    model = tf.keras.models.load_model(current_model_path)
    
    ##################################################################################
    ##### Prediction and metrics for TRAIN dataset
    ##################################################################################

    y_pred = model.predict(fold["X_train"])
    label_pred = pred2label(y_pred)
    
    # Compute precision, recall, sensitivity, specifity, mcc
    acc = accuracy_score(fold["y_train"], label_pred)
    prec = precision_score(fold["y_train"],label_pred)
    mcc = matthews_corrcoef(fold["y_train"], label_pred)

    conf = confusion_matrix(fold["y_train"], label_pred)
    tn, fp, fn, tp = conf.ravel()
    sens = tp/(tp+fn)
    spec = tn/(tn+fp)
    
    fpr, tpr, thresholds = roc_curve(fold["y_train"], y_pred)
    auc = roc_auc_score(fold["y_train"], y_pred)
    
    evaluations["Fold"].append(i)
    evaluations["Train_Test"].append("Train")
    evaluations["Accuracy"].append(acc)
    evaluations["Precision"].append(prec)
    evaluations["TPR"].append(tpr)
    evaluations["FPR"].append(fpr)
    evaluations["TPR_FPR_Thresholds"].append(thresholds)
    evaluations["AUC"].append(auc)
    evaluations["Sensitivity"].append(sens)
    evaluations["Specificity"].append(spec)
    evaluations["MCC"].append(mcc)
    
    ##################################################################################
    ##### Prediction and metrics for TEST dataset
    ##################################################################################

    y_pred = model.predict(fold["X_test"])
    label_pred = pred2label(y_pred)
    
    # Compute precision, recall, sensitivity, specifity, mcc
    acc = accuracy_score(fold["y_test"], label_pred)
    prec = precision_score(fold["y_test"],label_pred)
    mcc = matthews_corrcoef(fold["y_test"], label_pred)

    conf = confusion_matrix(fold["y_test"], label_pred)
    tn, fp, fn, tp = conf.ravel()
    sens = tp/(tp+fn)
    spec = tn/(tn+fp)
    
    fpr, tpr, thresholds = roc_curve(fold["y_test"], y_pred)
    auc = roc_auc_score(fold["y_test"], y_pred)
    
    evaluations["Fold"].append(i)
    evaluations["Train_Test"].append("Test")
    evaluations["Accuracy"].append(acc)
    evaluations["Precision"].append(prec)
    evaluations["TPR"].append(tpr)
    evaluations["FPR"].append(fpr)
    evaluations["TPR_FPR_Thresholds"].append(thresholds)
    evaluations["AUC"].append(auc)
    evaluations["Sensitivity"].append(sens)
    evaluations["Specificity"].append(spec)
    evaluations["MCC"].append(mcc)


Train/Test model on Fold #0.
Epoch 1/100


  super(Adam, self).__init__(name, **kwargs)


Epoch 1: val_loss improved from inf to 1.35372, saving model to Results\NT_Site_PredNTS_Classification_DLNN_CORENup\5fold\models\bestModel-fold0.hdf5
Epoch 2/100
Epoch 2: val_loss improved from 1.35372 to 1.26336, saving model to Results\NT_Site_PredNTS_Classification_DLNN_CORENup\5fold\models\bestModel-fold0.hdf5
Epoch 3/100
Epoch 3: val_loss improved from 1.26336 to 1.16788, saving model to Results\NT_Site_PredNTS_Classification_DLNN_CORENup\5fold\models\bestModel-fold0.hdf5
Epoch 4/100
Epoch 4: val_loss improved from 1.16788 to 1.06409, saving model to Results\NT_Site_PredNTS_Classification_DLNN_CORENup\5fold\models\bestModel-fold0.hdf5
Epoch 5/100
Epoch 5: val_loss improved from 1.06409 to 1.01193, saving model to Results\NT_Site_PredNTS_Classification_DLNN_CORENup\5fold\models\bestModel-fold0.hdf5
Epoch 6/100
Epoch 6: val_loss improved from 1.01193 to 0.97368, saving model to Results\NT_Site_PredNTS_Classification_DLNN_CORENup\5fold\models\bestModel-fold0.hdf5
Epoch 7/100
Epoch 7:

Epoch 58: val_loss did not improve from 0.58855
Epoch 59/100
Epoch 59: val_loss improved from 0.58855 to 0.58466, saving model to Results\NT_Site_PredNTS_Classification_DLNN_CORENup\5fold\models\bestModel-fold0.hdf5
Epoch 60/100
Epoch 60: val_loss did not improve from 0.58466
Epoch 61/100
Epoch 61: val_loss did not improve from 0.58466
Epoch 62/100
Epoch 62: val_loss did not improve from 0.58466
Epoch 63/100
Epoch 63: val_loss did not improve from 0.58466
Epoch 64/100
Epoch 64: val_loss did not improve from 0.58466
Epoch 65/100
Epoch 65: val_loss improved from 0.58466 to 0.58354, saving model to Results\NT_Site_PredNTS_Classification_DLNN_CORENup\5fold\models\bestModel-fold0.hdf5
Epoch 66/100
Epoch 66: val_loss did not improve from 0.58354
Epoch 67/100
Epoch 67: val_loss did not improve from 0.58354
Epoch 68/100
Epoch 68: val_loss did not improve from 0.58354
Epoch 69/100
Epoch 69: val_loss did not improve from 0.58354
Epoch 70/100
Epoch 70: val_loss did not improve from 0.58354
Epoch 

Epoch 96/100
Epoch 96: val_loss did not improve from 0.58354
Epoch 97/100
Epoch 97: val_loss did not improve from 0.58354
Epoch 98/100
Epoch 98: val_loss did not improve from 0.58354
Epoch 99/100
Epoch 99: val_loss did not improve from 0.58354
Epoch 100/100
Epoch 100: val_loss did not improve from 0.58354

Train/Test model on Fold #1.
Epoch 1/100


  super(Adam, self).__init__(name, **kwargs)


Epoch 1: val_loss improved from inf to 1.35359, saving model to Results\NT_Site_PredNTS_Classification_DLNN_CORENup\5fold\models\bestModel-fold1.hdf5
Epoch 2/100
Epoch 2: val_loss improved from 1.35359 to 1.27117, saving model to Results\NT_Site_PredNTS_Classification_DLNN_CORENup\5fold\models\bestModel-fold1.hdf5
Epoch 3/100
Epoch 3: val_loss improved from 1.27117 to 1.16762, saving model to Results\NT_Site_PredNTS_Classification_DLNN_CORENup\5fold\models\bestModel-fold1.hdf5
Epoch 4/100
Epoch 4: val_loss improved from 1.16762 to 1.07759, saving model to Results\NT_Site_PredNTS_Classification_DLNN_CORENup\5fold\models\bestModel-fold1.hdf5
Epoch 5/100
Epoch 5: val_loss improved from 1.07759 to 1.02858, saving model to Results\NT_Site_PredNTS_Classification_DLNN_CORENup\5fold\models\bestModel-fold1.hdf5
Epoch 6/100
Epoch 6: val_loss improved from 1.02858 to 0.97949, saving model to Results\NT_Site_PredNTS_Classification_DLNN_CORENup\5fold\models\bestModel-fold1.hdf5
Epoch 7/100
Epoch 7:

Epoch 27/100
Epoch 27: val_loss improved from 0.66972 to 0.66918, saving model to Results\NT_Site_PredNTS_Classification_DLNN_CORENup\5fold\models\bestModel-fold1.hdf5
Epoch 28/100
Epoch 28: val_loss improved from 0.66918 to 0.66091, saving model to Results\NT_Site_PredNTS_Classification_DLNN_CORENup\5fold\models\bestModel-fold1.hdf5
Epoch 29/100
Epoch 29: val_loss improved from 0.66091 to 0.65478, saving model to Results\NT_Site_PredNTS_Classification_DLNN_CORENup\5fold\models\bestModel-fold1.hdf5
Epoch 30/100
Epoch 30: val_loss did not improve from 0.65478
Epoch 31/100
Epoch 31: val_loss improved from 0.65478 to 0.64693, saving model to Results\NT_Site_PredNTS_Classification_DLNN_CORENup\5fold\models\bestModel-fold1.hdf5
Epoch 32/100
Epoch 32: val_loss improved from 0.64693 to 0.64135, saving model to Results\NT_Site_PredNTS_Classification_DLNN_CORENup\5fold\models\bestModel-fold1.hdf5
Epoch 33/100
Epoch 33: val_loss did not improve from 0.64135
Epoch 34/100
Epoch 34: val_loss did no

Epoch 96: val_loss did not improve from 0.60348
Epoch 97/100
Epoch 97: val_loss did not improve from 0.60348
Epoch 98/100
Epoch 98: val_loss did not improve from 0.60348
Epoch 99/100
Epoch 99: val_loss did not improve from 0.60348
Epoch 100/100
Epoch 100: val_loss did not improve from 0.60348

Train/Test model on Fold #2.
Epoch 1/100


  super(Adam, self).__init__(name, **kwargs)


Epoch 1: val_loss improved from inf to 1.35066, saving model to Results\NT_Site_PredNTS_Classification_DLNN_CORENup\5fold\models\bestModel-fold2.hdf5
Epoch 2/100
Epoch 2: val_loss improved from 1.35066 to 1.25712, saving model to Results\NT_Site_PredNTS_Classification_DLNN_CORENup\5fold\models\bestModel-fold2.hdf5
Epoch 3/100
Epoch 3: val_loss improved from 1.25712 to 1.14898, saving model to Results\NT_Site_PredNTS_Classification_DLNN_CORENup\5fold\models\bestModel-fold2.hdf5
Epoch 4/100
Epoch 4: val_loss improved from 1.14898 to 1.05440, saving model to Results\NT_Site_PredNTS_Classification_DLNN_CORENup\5fold\models\bestModel-fold2.hdf5
Epoch 5/100
Epoch 5: val_loss improved from 1.05440 to 1.00320, saving model to Results\NT_Site_PredNTS_Classification_DLNN_CORENup\5fold\models\bestModel-fold2.hdf5
Epoch 6/100
Epoch 6: val_loss improved from 1.00320 to 0.95284, saving model to Results\NT_Site_PredNTS_Classification_DLNN_CORENup\5fold\models\bestModel-fold2.hdf5
Epoch 7/100
Epoch 7:

Epoch 28/100
Epoch 28: val_loss improved from 0.65273 to 0.64957, saving model to Results\NT_Site_PredNTS_Classification_DLNN_CORENup\5fold\models\bestModel-fold2.hdf5
Epoch 29/100
Epoch 29: val_loss did not improve from 0.64957
Epoch 30/100
Epoch 30: val_loss did not improve from 0.64957
Epoch 31/100
Epoch 31: val_loss improved from 0.64957 to 0.63919, saving model to Results\NT_Site_PredNTS_Classification_DLNN_CORENup\5fold\models\bestModel-fold2.hdf5
Epoch 32/100
Epoch 32: val_loss improved from 0.63919 to 0.63583, saving model to Results\NT_Site_PredNTS_Classification_DLNN_CORENup\5fold\models\bestModel-fold2.hdf5
Epoch 33/100
Epoch 33: val_loss improved from 0.63583 to 0.63483, saving model to Results\NT_Site_PredNTS_Classification_DLNN_CORENup\5fold\models\bestModel-fold2.hdf5
Epoch 34/100
Epoch 34: val_loss improved from 0.63483 to 0.63434, saving model to Results\NT_Site_PredNTS_Classification_DLNN_CORENup\5fold\models\bestModel-fold2.hdf5
Epoch 35/100
Epoch 35: val_loss improv

Epoch 59/100
Epoch 59: val_loss improved from 0.60312 to 0.60177, saving model to Results\NT_Site_PredNTS_Classification_DLNN_CORENup\5fold\models\bestModel-fold2.hdf5
Epoch 60/100
Epoch 60: val_loss did not improve from 0.60177
Epoch 61/100
Epoch 61: val_loss did not improve from 0.60177
Epoch 62/100
Epoch 62: val_loss did not improve from 0.60177
Epoch 63/100
Epoch 63: val_loss did not improve from 0.60177
Epoch 64/100
Epoch 64: val_loss did not improve from 0.60177
Epoch 65/100
Epoch 65: val_loss did not improve from 0.60177
Epoch 66/100
Epoch 66: val_loss did not improve from 0.60177
Epoch 67/100
Epoch 67: val_loss did not improve from 0.60177
Epoch 68/100
Epoch 68: val_loss did not improve from 0.60177
Epoch 69/100
Epoch 69: val_loss did not improve from 0.60177
Epoch 70/100
Epoch 70: val_loss did not improve from 0.60177
Epoch 71/100
Epoch 71: val_loss did not improve from 0.60177
Epoch 72/100
Epoch 72: val_loss did not improve from 0.60177
Epoch 73/100
Epoch 73: val_loss did not

Epoch 97/100
Epoch 97: val_loss did not improve from 0.60177
Epoch 98/100
Epoch 98: val_loss did not improve from 0.60177
Epoch 99/100
Epoch 99: val_loss did not improve from 0.60177
Epoch 100/100
Epoch 100: val_loss did not improve from 0.60177

Train/Test model on Fold #3.
Epoch 1/100


  super(Adam, self).__init__(name, **kwargs)


Epoch 1: val_loss improved from inf to 1.35251, saving model to Results\NT_Site_PredNTS_Classification_DLNN_CORENup\5fold\models\bestModel-fold3.hdf5
Epoch 2/100
Epoch 2: val_loss improved from 1.35251 to 1.25605, saving model to Results\NT_Site_PredNTS_Classification_DLNN_CORENup\5fold\models\bestModel-fold3.hdf5
Epoch 3/100
Epoch 3: val_loss improved from 1.25605 to 1.14280, saving model to Results\NT_Site_PredNTS_Classification_DLNN_CORENup\5fold\models\bestModel-fold3.hdf5
Epoch 4/100
Epoch 4: val_loss improved from 1.14280 to 1.08230, saving model to Results\NT_Site_PredNTS_Classification_DLNN_CORENup\5fold\models\bestModel-fold3.hdf5
Epoch 5/100
Epoch 5: val_loss improved from 1.08230 to 1.01895, saving model to Results\NT_Site_PredNTS_Classification_DLNN_CORENup\5fold\models\bestModel-fold3.hdf5
Epoch 6/100
Epoch 6: val_loss improved from 1.01895 to 0.98678, saving model to Results\NT_Site_PredNTS_Classification_DLNN_CORENup\5fold\models\bestModel-fold3.hdf5
Epoch 7/100
Epoch 7:

Epoch 28/100
Epoch 28: val_loss did not improve from 0.67749
Epoch 29/100
Epoch 29: val_loss improved from 0.67749 to 0.66845, saving model to Results\NT_Site_PredNTS_Classification_DLNN_CORENup\5fold\models\bestModel-fold3.hdf5
Epoch 30/100
Epoch 30: val_loss improved from 0.66845 to 0.66784, saving model to Results\NT_Site_PredNTS_Classification_DLNN_CORENup\5fold\models\bestModel-fold3.hdf5
Epoch 31/100
Epoch 31: val_loss improved from 0.66784 to 0.66105, saving model to Results\NT_Site_PredNTS_Classification_DLNN_CORENup\5fold\models\bestModel-fold3.hdf5
Epoch 32/100
Epoch 32: val_loss improved from 0.66105 to 0.65859, saving model to Results\NT_Site_PredNTS_Classification_DLNN_CORENup\5fold\models\bestModel-fold3.hdf5
Epoch 33/100
Epoch 33: val_loss did not improve from 0.65859
Epoch 34/100
Epoch 34: val_loss did not improve from 0.65859
Epoch 35/100
Epoch 35: val_loss improved from 0.65859 to 0.65093, saving model to Results\NT_Site_PredNTS_Classification_DLNN_CORENup\5fold\model

Epoch 60/100
Epoch 60: val_loss did not improve from 0.63081
Epoch 61/100
Epoch 61: val_loss did not improve from 0.63081
Epoch 62/100
Epoch 62: val_loss did not improve from 0.63081
Epoch 63/100
Epoch 63: val_loss improved from 0.63081 to 0.63063, saving model to Results\NT_Site_PredNTS_Classification_DLNN_CORENup\5fold\models\bestModel-fold3.hdf5
Epoch 64/100
Epoch 64: val_loss did not improve from 0.63063
Epoch 65/100
Epoch 65: val_loss did not improve from 0.63063
Epoch 66/100
Epoch 66: val_loss did not improve from 0.63063
Epoch 67/100
Epoch 67: val_loss did not improve from 0.63063
Epoch 68/100
Epoch 68: val_loss did not improve from 0.63063
Epoch 69/100
Epoch 69: val_loss did not improve from 0.63063
Epoch 70/100
Epoch 70: val_loss did not improve from 0.63063
Epoch 71/100
Epoch 71: val_loss did not improve from 0.63063
Epoch 72/100
Epoch 72: val_loss did not improve from 0.63063
Epoch 73/100
Epoch 73: val_loss did not improve from 0.63063
Epoch 74/100
Epoch 74: val_loss did not

  super(Adam, self).__init__(name, **kwargs)


Epoch 1: val_loss improved from inf to 1.35383, saving model to Results\NT_Site_PredNTS_Classification_DLNN_CORENup\5fold\models\bestModel-fold4.hdf5
Epoch 2/100
Epoch 2: val_loss improved from 1.35383 to 1.26453, saving model to Results\NT_Site_PredNTS_Classification_DLNN_CORENup\5fold\models\bestModel-fold4.hdf5
Epoch 3/100
Epoch 3: val_loss improved from 1.26453 to 1.16240, saving model to Results\NT_Site_PredNTS_Classification_DLNN_CORENup\5fold\models\bestModel-fold4.hdf5
Epoch 4/100
Epoch 4: val_loss improved from 1.16240 to 1.06209, saving model to Results\NT_Site_PredNTS_Classification_DLNN_CORENup\5fold\models\bestModel-fold4.hdf5
Epoch 5/100
Epoch 5: val_loss improved from 1.06209 to 1.01679, saving model to Results\NT_Site_PredNTS_Classification_DLNN_CORENup\5fold\models\bestModel-fold4.hdf5
Epoch 6/100
Epoch 6: val_loss improved from 1.01679 to 0.97213, saving model to Results\NT_Site_PredNTS_Classification_DLNN_CORENup\5fold\models\bestModel-fold4.hdf5
Epoch 7/100
Epoch 7:

Epoch 28/100
Epoch 28: val_loss improved from 0.69683 to 0.69048, saving model to Results\NT_Site_PredNTS_Classification_DLNN_CORENup\5fold\models\bestModel-fold4.hdf5
Epoch 29/100
Epoch 29: val_loss did not improve from 0.69048
Epoch 30/100
Epoch 30: val_loss did not improve from 0.69048
Epoch 31/100
Epoch 31: val_loss improved from 0.69048 to 0.67780, saving model to Results\NT_Site_PredNTS_Classification_DLNN_CORENup\5fold\models\bestModel-fold4.hdf5
Epoch 32/100
Epoch 32: val_loss improved from 0.67780 to 0.67505, saving model to Results\NT_Site_PredNTS_Classification_DLNN_CORENup\5fold\models\bestModel-fold4.hdf5
Epoch 33/100
Epoch 33: val_loss improved from 0.67505 to 0.67160, saving model to Results\NT_Site_PredNTS_Classification_DLNN_CORENup\5fold\models\bestModel-fold4.hdf5
Epoch 34/100
Epoch 34: val_loss did not improve from 0.67160
Epoch 35/100
Epoch 35: val_loss did not improve from 0.67160
Epoch 36/100
Epoch 36: val_loss improved from 0.67160 to 0.66743, saving model to Re

Epoch 60/100
Epoch 60: val_loss did not improve from 0.63815
Epoch 61/100
Epoch 61: val_loss did not improve from 0.63815
Epoch 62/100
Epoch 62: val_loss did not improve from 0.63815
Epoch 63/100
Epoch 63: val_loss did not improve from 0.63815
Epoch 64/100
Epoch 64: val_loss did not improve from 0.63815
Epoch 65/100
Epoch 65: val_loss did not improve from 0.63815
Epoch 66/100
Epoch 66: val_loss did not improve from 0.63815
Epoch 67/100
Epoch 67: val_loss did not improve from 0.63815
Epoch 68/100
Epoch 68: val_loss did not improve from 0.63815
Epoch 69/100
Epoch 69: val_loss did not improve from 0.63815
Epoch 70/100
Epoch 70: val_loss did not improve from 0.63815
Epoch 71/100
Epoch 71: val_loss did not improve from 0.63815
Epoch 72/100
Epoch 72: val_loss did not improve from 0.63815
Epoch 73/100
Epoch 73: val_loss did not improve from 0.63815
Epoch 74/100
Epoch 74: val_loss did not improve from 0.63815
Epoch 75/100
Epoch 75: val_loss did not improve from 0.63815
Epoch 76/100
Epoch 76: v

## k-fold Training evaluation

In [11]:
evaluations_df = pd.DataFrame.from_dict(evaluations)

evaluations_df_grouped = evaluations_df.groupby(["Train_Test"]).mean().filter(['Accuracy', 
                                                                               'Precision', 
                                                                               'AUC', 
                                                                               'Sensitivity', 
                                                                               'Specificity', 
                                                                               'MCC'])

evaluations_df_grouped

Unnamed: 0_level_0,Accuracy,Precision,AUC,Sensitivity,Specificity,MCC
Train_Test,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Test,0.759434,0.759065,0.84012,0.761556,0.757322,0.519347
Train,0.942906,0.941296,0.983703,0.945213,0.940597,0.886248


In [12]:
evaluations_df[evaluations_df["Train_Test"] == "Test"]

Unnamed: 0,Fold,Train_Test,Accuracy,Precision,TPR,FPR,TPR_FPR_Thresholds,AUC,Sensitivity,Specificity,MCC
1,0,Test,0.775681,0.792035,"[0.0, 0.0041841004184100415, 0.104602510460251...","[0.0, 0.0, 0.0, 0.004201680672268907, 0.004201...","[1.9960138, 0.99601376, 0.9573596, 0.9553007, ...",0.856194,0.748954,0.802521,0.552233
3,1,Test,0.771488,0.779221,"[0.0, 0.004201680672268907, 0.0756302521008403...","[0.0, 0.0, 0.0, 0.0041841004184100415, 0.00418...","[1.995278, 0.995278, 0.97472954, 0.97269803, 0...",0.840494,0.756303,0.786611,0.543181
5,2,Test,0.754202,0.746939,"[0.0, 0.004201680672268907, 0.1302521008403361...","[0.0, 0.0, 0.0, 0.004201680672268907, 0.004201...","[1.9920356, 0.9920357, 0.95690477, 0.95638555,...",0.845209,0.768908,0.739496,0.508623
7,3,Test,0.766807,0.750988,"[0.0, 0.004201680672268907, 0.0504201680672268...","[0.0, 0.0, 0.0, 0.004201680672268907, 0.004201...","[1.9928411, 0.9928411, 0.97938436, 0.97892886,...",0.838897,0.798319,0.735294,0.534676
9,4,Test,0.728992,0.726141,"[0.0, 0.004201680672268907, 0.0882352941176470...","[0.0, 0.0, 0.0, 0.004201680672268907, 0.004201...","[1.9935296, 0.9935295, 0.9597013, 0.9574193, 0...",0.819804,0.735294,0.722689,0.45802


# Independent data

In [13]:
train_features = features
train_labels = labels

In [36]:
##################################################################################
##### read independent data file
##################################################################################
indpe_file_path = os.path.join(input_data_folder, independent_data_file)
indpe_data = pd.read_csv(indpe_file_path, sep='\t', header=None)
indpe_data.columns = ['Sequence', 'name', 'id', 'flag', 'label_original', 'type']
indpe_data.head()
    
##################################################################################
##### Create OHE of sequence
##################################################################################
indpe_data['OHE_Sequence'] = pd.Series([one_hot_encode_nt(val, all_char_dict) 
                                        for val in indpe_data["Sequence"]])

##################################################################################
##### Fix the labels
##################################################################################
indpe_data['label'] = pd.Series([1 if val == 1 else 0 
                                 for val in indpe_data["label_original"]])

##################################################################################
##### Extract features and labels, create folds
##################################################################################

indpe_features = np.array(list(indpe_data['OHE_Sequence']))
indpe_labels = np.array(list(indpe_data['label']))
indpe_labels = indpe_labels.reshape((indpe_labels.shape[0], 1))

input_seq_shape = indpe_features[0].shape

## Using k-fold Models

### Performance of each k-fold model

In [15]:
## create the evaluation data structure for all iterations
evaluations = {
    "Fold" : [],
    "Train_Test" : [],
    "Accuracy" : [],
    "Precision": [],
    "TPR": [],
    "FPR": [],
    "TPR_FPR_Thresholds": [],
    "AUC": [],
    "Sensitivity": [],
    "Specificity": [],
    "MCC":[]
}

##################################################################################
##### Prediction and metrics for Independent dataset
##################################################################################

for i in range(n_fold):
    
    current_model_path = os.path.join(modelPath, "bestModel-fold{}.hdf5".format(i))
    model = tf.keras.models.load_model(current_model_path)

    y_pred = model.predict(indpe_features)
    label_pred = pred2label(y_pred)

    # Compute precision, recall, sensitivity, specifity, mcc
    acc = accuracy_score(indpe_labels, label_pred)
    prec = precision_score(indpe_labels,label_pred)
    mcc = matthews_corrcoef(indpe_labels, label_pred)

    conf = confusion_matrix(indpe_labels, label_pred)
    tn, fp, fn, tp = conf.ravel()
    sens = tp/(tp+fn)
    spec = tn/(tn+fp)
    
    fpr, tpr, thresholds = roc_curve(indpe_labels, y_pred)
    auc = roc_auc_score(indpe_labels, y_pred)

    evaluations["Fold"].append(i)
    evaluations["Train_Test"].append("Independent")
    evaluations["Accuracy"].append(acc)
    evaluations["Precision"].append(prec)
    evaluations["TPR"].append(tpr)
    evaluations["FPR"].append(fpr)
    evaluations["TPR_FPR_Thresholds"].append(thresholds)
    evaluations["AUC"].append(auc)
    evaluations["Sensitivity"].append(sens)
    evaluations["Specificity"].append(spec)
    evaluations["MCC"].append(mcc)

##################################################################################

evaluations_df = pd.DataFrame.from_dict(evaluations)

evaluations_df_grouped = evaluations_df.groupby(["Train_Test"]).mean().filter(['Accuracy', 
                                                                               'Precision', 
                                                                               'AUC', 
                                                                               'Sensitivity', 
                                                                               'Specificity', 
                                                                               'MCC'])

evaluations_df_grouped

Unnamed: 0_level_0,Accuracy,Precision,AUC,Sensitivity,Specificity,MCC
Train_Test,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Independent,0.626776,0.246891,0.667723,0.608867,0.630333,0.181104


In [16]:
evaluations_df

Unnamed: 0,Fold,Train_Test,Accuracy,Precision,TPR,FPR,TPR_FPR_Thresholds,AUC,Sensitivity,Specificity,MCC
0,0,Independent,0.653061,0.262821,"[0.0, 0.0049261083743842365, 0.004926108374384...","[0.0, 0.0, 0.0019569471624266144, 0.0019569471...","[1.9960555, 0.9960555, 0.9889481, 0.9862916, 0...",0.675735,0.605911,0.662427,0.205345
1,1,Independent,0.63102,0.245399,"[0.0, 0.0049261083743842365, 0.004926108374384...","[0.0, 0.0, 0.0009784735812133072, 0.0009784735...","[1.9972394, 0.99723935, 0.995284, 0.99477816, ...",0.665998,0.591133,0.638943,0.174684
2,2,Independent,0.614694,0.237817,"[0.0, 0.0049261083743842365, 0.004926108374384...","[0.0, 0.0, 0.0019569471624266144, 0.0019569471...","[1.9965749, 0.9965749, 0.99159884, 0.9882587, ...",0.665299,0.600985,0.617417,0.164601
3,3,Independent,0.604082,0.237918,"[0.0, 0.0049261083743842365, 0.004926108374384...","[0.0, 0.0, 0.0009784735812133072, 0.0009784735...","[1.9974382, 0.9974382, 0.9929658, 0.99229115, ...",0.657168,0.630542,0.598826,0.171845
4,4,Independent,0.63102,0.250501,"[0.0, 0.0, 0.009852216748768473, 0.00985221674...","[0.0, 0.0009784735812133072, 0.000978473581213...","[1.9954288, 0.9954288, 0.9944258, 0.99050903, ...",0.674414,0.615764,0.634051,0.189048


### Mean score with k-fold models

In [17]:
## create the evaluation data structure for all iterations
evaluations = {
    "Train_Test" : [],
    "Accuracy" : [],
    "Precision": [],
    "TPR": [],
    "FPR": [],
    "TPR_FPR_Thresholds": [],
    "AUC": [],
    "Sensitivity": [],
    "Specificity": [],
    "MCC":[]
}

##################################################################################
##### Prediction and metrics for Independent dataset
##################################################################################

total_pred = np.zeros(indpe_labels.shape)
all_preds = []

for i in range(n_fold):
    
    current_model_path = os.path.join(modelPath, "bestModel-fold{}.hdf5".format(i))
    model = tf.keras.models.load_model(current_model_path)

    y_pred = model.predict(indpe_features)
    total_pred += y_pred
    all_preds.append(y_pred)
    
total_pred = total_pred / n_fold
label_pred = pred2label(total_pred)

# Compute precision, recall, sensitivity, specifity, mcc
acc = accuracy_score(indpe_labels, label_pred)
prec = precision_score(indpe_labels,label_pred)
mcc = matthews_corrcoef(indpe_labels, label_pred)

conf = confusion_matrix(indpe_labels, label_pred)
tn, fp, fn, tp = conf.ravel()
sens = tp/(tp+fn)
spec = tn/(tn+fp)

fpr, tpr, thresholds = roc_curve(indpe_labels, total_pred)
auc = roc_auc_score(indpe_labels, total_pred)

evaluations["Train_Test"].append("Independent")
evaluations["Accuracy"].append(acc)
evaluations["Precision"].append(prec)
evaluations["TPR"].append(tpr)
evaluations["FPR"].append(fpr)
evaluations["TPR_FPR_Thresholds"].append(thresholds)
evaluations["AUC"].append(auc)
evaluations["Sensitivity"].append(sens)
evaluations["Specificity"].append(spec)
evaluations["MCC"].append(mcc)

##################################################################################

evaluations_df = pd.DataFrame.from_dict(evaluations)

evaluations_df_grouped = evaluations_df.groupby(["Train_Test"]).mean().filter(['Accuracy', 
                                                                               'Precision', 
                                                                               'AUC', 
                                                                               'Sensitivity', 
                                                                               'Specificity', 
                                                                               'MCC'])

evaluations_df_grouped

Unnamed: 0_level_0,Accuracy,Precision,AUC,Sensitivity,Specificity,MCC
Train_Test,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Independent,0.639184,0.250522,0.676178,0.591133,0.648728,0.182766


### Voting score with k-fold models

In [18]:
## create the evaluation data structure for all iterations
evaluations = {
    "Train_Test" : [],
    "Accuracy" : [],
    "Precision": [],
    "TPR": [],
    "FPR": [],
    "TPR_FPR_Thresholds": [],
    "AUC": [],
    "Sensitivity": [],
    "Specificity": [],
    "MCC":[]
}

##################################################################################
##### Prediction and metrics for Independent dataset
##################################################################################

total_pred = np.zeros(indpe_labels.shape)
all_preds = []

for i in range(n_fold):
    
    current_model_path = os.path.join(modelPath, "bestModel-fold{}.hdf5".format(i))
    model = tf.keras.models.load_model(current_model_path)

    y_pred = model.predict(indpe_features)
    vote_pred = pred2label(y_pred)
    total_pred += vote_pred
    all_preds.append(vote_pred)
    
total_pred = total_pred / n_fold
label_pred = pred2label(total_pred)

# Compute precision, recall, sensitivity, specifity, mcc
acc = accuracy_score(indpe_labels, label_pred)
prec = precision_score(indpe_labels,label_pred)
mcc = matthews_corrcoef(indpe_labels, label_pred)

conf = confusion_matrix(indpe_labels, label_pred)
tn, fp, fn, tp = conf.ravel()
sens = tp/(tp+fn)
spec = tn/(tn+fp)

fpr, tpr, thresholds = roc_curve(indpe_labels, total_pred)
auc = roc_auc_score(indpe_labels, total_pred)

evaluations["Train_Test"].append("Independent")
evaluations["Accuracy"].append(acc)
evaluations["Precision"].append(prec)
evaluations["TPR"].append(tpr)
evaluations["FPR"].append(fpr)
evaluations["TPR_FPR_Thresholds"].append(thresholds)
evaluations["AUC"].append(auc)
evaluations["Sensitivity"].append(sens)
evaluations["Specificity"].append(spec)
evaluations["MCC"].append(mcc)

##################################################################################

evaluations_df = pd.DataFrame.from_dict(evaluations)

evaluations_df_grouped = evaluations_df.groupby(["Train_Test"]).mean().filter(['Accuracy', 
                                                                               'Precision', 
                                                                               'AUC', 
                                                                               'Sensitivity', 
                                                                               'Specificity', 
                                                                               'MCC'])

evaluations_df_grouped

Unnamed: 0_level_0,Accuracy,Precision,AUC,Sensitivity,Specificity,MCC
Train_Test,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Independent,0.632653,0.246407,0.661496,0.591133,0.6409,0.176291


## Using New Model

Train model on full data from training. Predict and evaluate on Independent data.

In [19]:
model = DLNN_CORENup(input_seq_shape = input_seq_shape)
    
## Define the model callbacks for early stopping and saving the model. Then train model
current_model_path = os.path.join(modelPath, "_fullModel.hdf5")
modelCallbacks = [
    tf.keras.callbacks.ModelCheckpoint(current_model_path,
                                       monitor = 'val_loss', verbose = 1, save_best_only = True, 
                                       save_weights_only = False, mode = 'auto', save_freq = 'epoch'),
]

# adding random shuffling of the dataset for training purpose
index_arr = np.arange(train_features.shape[0])
index_arr = np.random.permutation(index_arr)

model.fit(x = train_features[index_arr], y = train_labels[index_arr], batch_size = batch_size, epochs = epochs, verbose = 1, 
          callbacks = modelCallbacks, validation_data = (indpe_features, indpe_labels))
# model.fit(x = train_features[index_arr], y = train_labels[index_arr], batch_size = batch_size, epochs = epochs, verbose = 1, 
#           callbacks = modelCallbacks, validation_split = 0.2)

model = tf.keras.models.load_model(current_model_path)

Epoch 1/100


  super(Adam, self).__init__(name, **kwargs)


Epoch 1: val_loss improved from inf to 1.34049, saving model to Results\NT_Site_PredNTS_Classification_DLNN_CORENup\5fold\models\_fullModel.hdf5
Epoch 2/100
Epoch 2: val_loss improved from 1.34049 to 1.27402, saving model to Results\NT_Site_PredNTS_Classification_DLNN_CORENup\5fold\models\_fullModel.hdf5
Epoch 3/100
Epoch 3: val_loss improved from 1.27402 to 1.26119, saving model to Results\NT_Site_PredNTS_Classification_DLNN_CORENup\5fold\models\_fullModel.hdf5
Epoch 4/100
Epoch 4: val_loss improved from 1.26119 to 1.08604, saving model to Results\NT_Site_PredNTS_Classification_DLNN_CORENup\5fold\models\_fullModel.hdf5
Epoch 5/100
Epoch 5: val_loss did not improve from 1.08604
Epoch 6/100
Epoch 6: val_loss did not improve from 1.08604
Epoch 7/100
Epoch 7: val_loss did not improve from 1.08604
Epoch 8/100
Epoch 8: val_loss improved from 1.08604 to 1.01971, saving model to Results\NT_Site_PredNTS_Classification_DLNN_CORENup\5fold\models\_fullModel.hdf5
Epoch 9/100
Epoch 9: val_loss did 

Epoch 73/100
Epoch 73: val_loss did not improve from 0.75899
Epoch 74/100
Epoch 74: val_loss did not improve from 0.75899
Epoch 75/100
Epoch 75: val_loss did not improve from 0.75899
Epoch 76/100
Epoch 76: val_loss did not improve from 0.75899
Epoch 77/100
Epoch 77: val_loss did not improve from 0.75899
Epoch 78/100
Epoch 78: val_loss did not improve from 0.75899
Epoch 79/100
Epoch 79: val_loss did not improve from 0.75899
Epoch 80/100
Epoch 80: val_loss did not improve from 0.75899
Epoch 81/100
Epoch 81: val_loss did not improve from 0.75899
Epoch 82/100
Epoch 82: val_loss did not improve from 0.75899
Epoch 83/100
Epoch 83: val_loss did not improve from 0.75899
Epoch 84/100
Epoch 84: val_loss did not improve from 0.75899
Epoch 85/100
Epoch 85: val_loss did not improve from 0.75899
Epoch 86/100
Epoch 86: val_loss did not improve from 0.75899
Epoch 87/100
Epoch 87: val_loss did not improve from 0.75899
Epoch 88/100
Epoch 88: val_loss did not improve from 0.75899
Epoch 89/100
Epoch 89: v

In [20]:
## create the evaluation data structure for all iterations
evaluations = {
    "Train_Test" : [],
    "Accuracy" : [],
    "Precision": [],
    "TPR": [],
    "FPR": [],
    "TPR_FPR_Thresholds": [],
    "AUC": [],
    "Sensitivity": [],
    "Specificity": [],
    "MCC":[]
}

##################################################################################
##### Prediction and metrics for Independent dataset
##################################################################################

y_pred = model.predict(indpe_features)
label_pred = pred2label(y_pred)

# Compute precision, recall, sensitivity, specifity, mcc
acc = accuracy_score(indpe_labels, label_pred)
prec = precision_score(indpe_labels,label_pred)
mcc = matthews_corrcoef(indpe_labels, label_pred)

conf = confusion_matrix(indpe_labels, label_pred)
tn, fp, fn, tp = conf.ravel()
sens = tp/(tp+fn)
spec = tn/(tn+fp)

fpr, tpr, thresholds = roc_curve(indpe_labels, y_pred)
auc = roc_auc_score(indpe_labels, y_pred)

evaluations["Train_Test"].append("Independent")
evaluations["Accuracy"].append(acc)
evaluations["Precision"].append(prec)
evaluations["TPR"].append(tpr)
evaluations["FPR"].append(fpr)
evaluations["TPR_FPR_Thresholds"].append(thresholds)
evaluations["AUC"].append(auc)
evaluations["Sensitivity"].append(sens)
evaluations["Specificity"].append(spec)
evaluations["MCC"].append(mcc)

##################################################################################

evaluations_df = pd.DataFrame.from_dict(evaluations)

evaluations_df_grouped = evaluations_df.groupby(["Train_Test"]).mean().filter(['Accuracy', 
                                                                               'Precision', 
                                                                               'AUC', 
                                                                               'Sensitivity', 
                                                                               'Specificity', 
                                                                               'MCC'])

evaluations_df_grouped

Unnamed: 0_level_0,Accuracy,Precision,AUC,Sensitivity,Specificity,MCC
Train_Test,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Independent,0.67102,0.256098,0.666437,0.517241,0.701566,0.172411
