In [1]:
##################################################################################
##### Define all parameters for model tuning
##################################################################################

n_fold = 5
expName = "NT_Site_iNitroY_Classification_DLNN_CORENup"
outPath = "Results"
foldName = "folds.pickle"

epochs = 100
batch_size = 16
shuffle = True
seed = None

input_data_folder = "Data\\iNitroY-Deep-Dataset"
pos_data_file = "raw-nitrotyrosine-pos.fasta"
neg_data_file = "cdhit70-nitrotyr-neg.fasta"

In [2]:
import os 
import pickle
import numpy as np
import pandas as pd

import tensorflow as tf

from sklearn.metrics import roc_auc_score
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.metrics import roc_curve, auc, accuracy_score, precision_score, confusion_matrix
from sklearn.metrics import roc_auc_score

import math

from Bio import SeqIO

In [3]:
# print(tf.test.is_gpu_available(cuda_only=True))
# physical_devices = tf.config.experimental.list_physical_devices('GPU')
physical_devices = tf.config.list_physical_devices('GPU')
print(physical_devices)
tf.config.experimental.set_memory_growth(physical_devices[0], True)

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [4]:
##################################################################################
##### define all CUSTOM functions
##################################################################################

def one_hot_encode_nt(sequence, char_dict):
    
    seq_encoded = np.zeros((len(sequence),len(char_dict)))
    
    i = 0
    for single_character in sequence:
        if(single_character.upper() in char_dict.keys()):
            seq_encoded[i][char_dict[single_character.upper()]] = 1
            i = i+1
        else:
            raise ValueError('Incorrect character in NT sequence: '+sequence)
    return seq_encoded

In [5]:
##################################################################################
##### Build k-fold functions
##################################################################################

## Build the K-fold from dataset
def build_kfold(features, labels, k=10, shuffle=False, seed=None):
    
    skf = StratifiedKFold(n_splits=k, shuffle=shuffle, random_state=seed)
    kfoldList = []
    for train_index, test_index in skf.split(features, labels):
        X_train, X_test = features[train_index], features[test_index]
        y_train, y_test = labels[train_index], labels[test_index]
        kfoldList.append({
            "X_train": X_train,
            "X_test": X_test,
            "y_train":y_train,
            "y_test":y_test
        })
    return kfoldList

In [6]:
##################################################################################
##### define evaluator functions
##################################################################################

def pred2label(y_pred):
    y_pred = np.round(y_pred)
    return y_pred

In [7]:
##################################################################################
##### Function to customize the DLNN architecture with parameters
##################################################################################

def DLNN_CORENup(input_seq_shape = (41, 21),
                 conv_filters_per_layer_1 = 50, kernel_length_1 = 5, conv_strides_1 = 1, ## 1st Convolutional layer parameters
                 max_pool_width_1 = 2, max_pool_stride_1 = 2, ## 1st Maxpool layer parameters
                 lstm_decode_units = 50, ## LSTM layer parameters
                 conv_filters_per_layer_2 = 50,  kernel_length_2 = 10, conv_strides_2 = 1, ## 2nd Convolutional layer parameters
                 max_pool_width_2 = 2, max_pool_stride_2 = 2, ## 2nd Maxpool layer parameters
                 dense_decode_units = 370, ## Dense layer parameters
                 prob = 0.5, learn_rate = 0.0003, loss = 'binary_crossentropy', metrics = None):
    
    beta = 0.001
    
    ######################################################################################################
    ########  SEQUENCE  ##################################################################################
    ######################################################################################################
    
    input1 = tf.keras.layers.Input(shape=input_seq_shape)

    x1 = tf.keras.layers.Conv1D(conv_filters_per_layer_1, kernel_length_1,
                                strides = conv_strides_1, 
                                kernel_regularizer = tf.keras.regularizers.l2(beta), 
                                padding = "same")(input1)
    x1 = tf.keras.layers.Activation('relu')(x1)
    x1 = tf.keras.layers.MaxPool1D(pool_size = max_pool_width_1, strides = max_pool_stride_1)(x1)
    x1 = tf.keras.layers.Dropout(prob)(x1)

    ## LSTM Path

    x2 = tf.keras.layers.LSTM(lstm_decode_units, return_sequences = True, 
                              kernel_regularizer = tf.keras.regularizers.l2(beta))(x1)
    x2 = tf.keras.layers.Dropout(prob)(x2)
    
    x2 = tf.keras.layers.Flatten()(x2)

    ## Conv Path

    x3 = tf.keras.layers.Conv1D(conv_filters_per_layer_2, kernel_length_2, 
                                strides = conv_strides_2, 
                                kernel_regularizer = tf.keras.regularizers.l2(beta), 
                                padding = 'same')(x1)
    x3 = tf.keras.layers.Activation('relu')(x3)
    x3 = tf.keras.layers.MaxPooling1D(pool_size = max_pool_width_2, strides = max_pool_stride_2)(x3)
    x3 = tf.keras.layers.Dropout(prob)(x3)
    
    x3 = tf.keras.layers.Flatten()(x3)
    
    x4 = tf.keras.layers.Concatenate(1)([x2,x3])
    
    ######################################################################################################
    ########  Classifier  ################################################################################
    ######################################################################################################
    
    y = tf.keras.layers.Dense(dense_decode_units, 
                              kernel_regularizer = tf.keras.regularizers.l2(beta), 
                              activation = 'relu')(x4)
    
    y = tf.keras.layers.Dropout(prob)(y)
    
    y = tf.keras.layers.Dense(1, 
                              kernel_regularizer = tf.keras.regularizers.l2(beta), 
                              activation = 'sigmoid')(y)

    ## Generate Model from input and output
    model = tf.keras.models.Model(inputs=input1, outputs=y)
    
    ## Compile model
    if(metrics != None):
        model.compile(optimizer = tf.keras.optimizers.Adam(lr=learn_rate), 
                      loss = loss, metrics = metrics)
    else:
        model.compile(optimizer = tf.keras.optimizers.Adam(lr=learn_rate), 
                      loss = loss)

    return model

In [8]:
# for step in range(10):
#     initial_learning_rate=1e-1
#     decay_steps=10000
#     decay_rate=0.9
#     print(step, ':', initial_learning_rate * decay_rate ** (step / decay_steps))

In [9]:
DLNN_CORENup().summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 41, 21)]     0           []                               
                                                                                                  
 conv1d (Conv1D)                (None, 41, 50)       5300        ['input_1[0][0]']                
                                                                                                  
 activation (Activation)        (None, 41, 50)       0           ['conv1d[0][0]']                 
                                                                                                  
 max_pooling1d (MaxPooling1D)   (None, 20, 50)       0           ['activation[0][0]']             
                                                                                              

  super(Adam, self).__init__(name, **kwargs)


# Prepare Dataset

In [10]:
def read_fasta_file(file_path):
    
    openFile = open(file_path)
    fastaSequences = SeqIO.parse(openFile, "fasta")

    name_list = []
    seq_list = []

    for fasta in fastaSequences: 
        name_list.append(fasta.id)
        seq_list.append(str(fasta.seq))

    openFile.close()
    
    return name_list, seq_list

In [11]:
##################################################################################
##### read positive and negative files
##################################################################################

pos_file_path = os.path.join(input_data_folder, pos_data_file)
_, pos_seq_list = read_fasta_file(pos_file_path)

neg_file_path = os.path.join(input_data_folder, neg_data_file)
_, neg_seq_list = read_fasta_file(neg_file_path)

pos_seq_list = [val.replace('X', '-') for val in pos_seq_list]
neg_seq_list = [val.replace('X', '-') for val in neg_seq_list]

# remove duplicates in data
pos_seq_list = list(set(pos_seq_list))
neg_seq_list = list(set(neg_seq_list))

all_seq_list = pos_seq_list + neg_seq_list

all_seq_label_list = ([1] * len(pos_seq_list)) + ([0] * len(neg_seq_list))

##################################################################################
##### Create dictionary of all characters in the NT sequence 
##################################################################################
all_char_set = set({})
for val in [set(val) for val in all_seq_list]:
    all_char_set = all_char_set.union(val)
all_char_list = list(all_char_set)
all_char_list.sort()
all_char_dict = {}
for i in range(len(all_char_list)):
    all_char_dict[all_char_list[i]] = i
    
##################################################################################
##### Create OHE of all sequences
##################################################################################
all_seq_OHE_list = [one_hot_encode_nt(val, all_char_dict)
                    for val in all_seq_list]

##################################################################################
##### Create numpy array of features and sequences
##################################################################################

## create the features and labels datasets for the training
features = np.array(all_seq_OHE_list)
labels = np.array(all_seq_label_list)
labels = labels.reshape((labels.shape[0], 1))

##################################################################################
##### Divide into Train/Independent datasets
##################################################################################

train_features, indpe_features, train_labels, indpe_labels = train_test_split(features, labels, 
                                                                              stratify=labels, test_size=0.3, 
                                                                              random_state=seed, shuffle=shuffle)

##################################################################################
##### Generate Folds from training dataset, and store to file
##################################################################################

## Generate the k-fold dataset
folds = build_kfold(train_features, train_labels, k=n_fold, shuffle=shuffle, seed=seed)

## Write the k-fold dataset to file
foldPath = os.path.join(outPath, expName, "{}fold".format(n_fold))
if(not os.path.isdir(foldPath)):
    os.makedirs(foldPath)
pickle.dump(folds, open(os.path.join(foldPath, foldName), "wb"))

## Write the independent test dataset to file
pickle.dump([indpe_features, indpe_labels], open(os.path.join(foldPath, 'independent_dataset.pickle'), "wb"))

##################################################################################

input_seq_shape = features[0].shape

In [12]:
indpe_features.shape

(202, 41, 21)

In [13]:
np.sum(indpe_labels)

50

# Training

In [14]:
##################################################################################
##### For each input file, train model and generate different outputs in a structured folder
##################################################################################

## create the evaluation data structure for all iterations
evaluations = {
    "Fold" : [],
    "Train_Test" : [],
    "Accuracy" : [],
    "Precision": [],
    "TPR": [],
    "FPR": [],
    "TPR_FPR_Thresholds": [],
    "AUC": [],
    "Sensitivity": [],
    "Specificity": [],
    "MCC":[]
}

##################################################################################
##### Train/Test model on all folds, generate evaluations
##################################################################################

## Create and set directory to save model
modelPath = os.path.join(outPath, expName, "{}fold".format(n_fold), "models")
if(not os.path.isdir(modelPath)):
    os.makedirs(modelPath)

i = -1
for fold in folds:
    i += 1
    
    print("\nTrain/Test model on Fold #"+str(i)+".")
    
    model = DLNN_CORENup(input_seq_shape = input_seq_shape)
    
    ## Define the model callbacks for early stopping and saving the model. Then train model
    current_model_path = os.path.join(modelPath, "bestModel-fold{}.hdf5".format(i))
    modelCallbacks = [
        tf.keras.callbacks.ModelCheckpoint(current_model_path,
                                           monitor = 'val_loss', verbose = 1, save_best_only = True, 
                                           save_weights_only = False, mode = 'auto', save_freq = 'epoch'),
    ]
    
    # adding random shuffling of the dataset for training purpose
    index_arr = np.arange(fold["X_train"].shape[0])
    index_arr = np.random.permutation(index_arr)
    
    model.fit(x = fold["X_train"][index_arr], y = fold["y_train"][index_arr], batch_size = batch_size, epochs = epochs, verbose = 1, 
              callbacks = modelCallbacks, validation_data = (fold["X_test"], fold["y_test"]))
    
    model = tf.keras.models.load_model(current_model_path)
    
    ##################################################################################
    ##### Prediction and metrics for TRAIN dataset
    ##################################################################################

    y_pred = model.predict(fold["X_train"])
    label_pred = pred2label(y_pred)
    
    # Compute precision, recall, sensitivity, specifity, mcc
    acc = accuracy_score(fold["y_train"], label_pred)
    prec = precision_score(fold["y_train"],label_pred)

    conf = confusion_matrix(fold["y_train"], label_pred)
    if(conf[0][0]+conf[1][0]):
        sens = float(conf[0][0])/float(conf[0][0]+conf[1][0])
    else:
        sens = 0.0
    if(conf[1][1]+conf[0][1]):
        spec = float(conf[1][1])/float(conf[1][1]+conf[0][1])
    else:
        spec = 0.0
    if((conf[0][0]+conf[0][1])*(conf[0][0]+conf[1][0])*(conf[1][1]+conf[0][1])*(conf[1][1]+conf[1][0])):
        mcc = (float(conf[0][0])*float(conf[1][1]) - float(conf[1][0])*float(conf[0][1]))/math.sqrt((conf[0][0]+conf[0][1])*(conf[0][0]+conf[1][0])*(conf[1][1]+conf[0][1])*(conf[1][1]+conf[1][0]))
    else:
        mcc= 0.0
    fpr, tpr, thresholds = roc_curve(fold["y_train"], y_pred)
    auc = roc_auc_score(fold["y_train"], y_pred)
    
    evaluations["Fold"].append(i)
    evaluations["Train_Test"].append("Train")
    evaluations["Accuracy"].append(acc)
    evaluations["Precision"].append(prec)
    evaluations["TPR"].append(tpr)
    evaluations["FPR"].append(fpr)
    evaluations["TPR_FPR_Thresholds"].append(thresholds)
    evaluations["AUC"].append(auc)
    evaluations["Sensitivity"].append(sens)
    evaluations["Specificity"].append(spec)
    evaluations["MCC"].append(mcc)
    
    ##################################################################################
    ##### Prediction and metrics for TEST dataset
    ##################################################################################

    y_pred = model.predict(fold["X_test"])
    label_pred = pred2label(y_pred)
    
    # Compute precision, recall, sensitivity, specifity, mcc
    acc = accuracy_score(fold["y_test"], label_pred)
    prec = precision_score(fold["y_test"],label_pred)

    conf = confusion_matrix(fold["y_test"], label_pred)
    if(conf[0][0]+conf[1][0]):
        sens = float(conf[0][0])/float(conf[0][0]+conf[1][0])
    else:
        sens = 0.0
    if(conf[1][1]+conf[0][1]):
        spec = float(conf[1][1])/float(conf[1][1]+conf[0][1])
    else:
        spec = 0.0
    if((conf[0][0]+conf[0][1])*(conf[0][0]+conf[1][0])*(conf[1][1]+conf[0][1])*(conf[1][1]+conf[1][0])):
        mcc = (float(conf[0][0])*float(conf[1][1]) - float(conf[1][0])*float(conf[0][1]))/math.sqrt((conf[0][0]+conf[0][1])*(conf[0][0]+conf[1][0])*(conf[1][1]+conf[0][1])*(conf[1][1]+conf[1][0]))
    else:
        mcc= 0.0
    fpr, tpr, thresholds = roc_curve(fold["y_test"], y_pred)
    auc = roc_auc_score(fold["y_test"], y_pred)
    
    evaluations["Fold"].append(i)
    evaluations["Train_Test"].append("Test")
    evaluations["Accuracy"].append(acc)
    evaluations["Precision"].append(prec)
    evaluations["TPR"].append(tpr)
    evaluations["FPR"].append(fpr)
    evaluations["TPR_FPR_Thresholds"].append(thresholds)
    evaluations["AUC"].append(auc)
    evaluations["Sensitivity"].append(sens)
    evaluations["Specificity"].append(spec)
    evaluations["MCC"].append(mcc)


Train/Test model on Fold #0.
Epoch 1/100


  super(Adam, self).__init__(name, **kwargs)


Epoch 1: val_loss improved from inf to 1.26914, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\bestModel-fold0.hdf5
Epoch 2/100
Epoch 2: val_loss improved from 1.26914 to 1.21524, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\bestModel-fold0.hdf5
Epoch 3/100
Epoch 3: val_loss improved from 1.21524 to 1.16941, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\bestModel-fold0.hdf5
Epoch 4/100
Epoch 4: val_loss improved from 1.16941 to 1.13064, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\bestModel-fold0.hdf5
Epoch 5/100
Epoch 5: val_loss improved from 1.13064 to 1.08837, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\bestModel-fold0.hdf5
Epoch 6/100
Epoch 6: val_loss improved from 1.08837 to 1.04938, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\bestModel-fold0.hdf5
Epoch 7/100
Epoch 7:

Epoch 28: val_loss improved from 0.64428 to 0.61516, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\bestModel-fold0.hdf5
Epoch 29/100
Epoch 29: val_loss improved from 0.61516 to 0.60604, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\bestModel-fold0.hdf5
Epoch 30/100
Epoch 30: val_loss improved from 0.60604 to 0.59531, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\bestModel-fold0.hdf5
Epoch 31/100
Epoch 31: val_loss did not improve from 0.59531
Epoch 32/100
Epoch 32: val_loss did not improve from 0.59531
Epoch 33/100
Epoch 33: val_loss improved from 0.59531 to 0.58991, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\bestModel-fold0.hdf5
Epoch 34/100
Epoch 34: val_loss improved from 0.58991 to 0.58809, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\bestModel-fold0.hdf5
Epoch 35/100
Epoch 35: val_loss did not improve fro

Epoch 63: val_loss did not improve from 0.54655
Epoch 64/100
Epoch 64: val_loss improved from 0.54655 to 0.54398, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\bestModel-fold0.hdf5
Epoch 65/100
Epoch 65: val_loss did not improve from 0.54398
Epoch 66/100
Epoch 66: val_loss did not improve from 0.54398
Epoch 67/100
Epoch 67: val_loss did not improve from 0.54398
Epoch 68/100
Epoch 68: val_loss did not improve from 0.54398
Epoch 69/100
Epoch 69: val_loss did not improve from 0.54398
Epoch 70/100
Epoch 70: val_loss did not improve from 0.54398
Epoch 71/100
Epoch 71: val_loss did not improve from 0.54398
Epoch 72/100
Epoch 72: val_loss did not improve from 0.54398
Epoch 73/100
Epoch 73: val_loss did not improve from 0.54398
Epoch 74/100
Epoch 74: val_loss did not improve from 0.54398
Epoch 75/100
Epoch 75: val_loss did not improve from 0.54398
Epoch 76/100
Epoch 76: val_loss did not improve from 0.54398
Epoch 77/100
Epoch 77: val_loss did not improve from

Epoch 100: val_loss did not improve from 0.51956

Train/Test model on Fold #1.
Epoch 1/100


  super(Adam, self).__init__(name, **kwargs)


Epoch 1: val_loss improved from inf to 1.25430, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\bestModel-fold1.hdf5
Epoch 2/100
Epoch 2: val_loss improved from 1.25430 to 1.21053, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\bestModel-fold1.hdf5
Epoch 3/100
Epoch 3: val_loss improved from 1.21053 to 1.15888, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\bestModel-fold1.hdf5
Epoch 4/100
Epoch 4: val_loss improved from 1.15888 to 1.12876, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\bestModel-fold1.hdf5
Epoch 5/100
Epoch 5: val_loss improved from 1.12876 to 1.06747, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\bestModel-fold1.hdf5
Epoch 6/100
Epoch 6: val_loss improved from 1.06747 to 1.02936, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\bestModel-fold1.hdf5
Epoch 7/100
Epoch 7:

Epoch 28/100
Epoch 28: val_loss improved from 0.47616 to 0.45196, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\bestModel-fold1.hdf5
Epoch 29/100
Epoch 29: val_loss improved from 0.45196 to 0.44466, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\bestModel-fold1.hdf5
Epoch 30/100
Epoch 30: val_loss did not improve from 0.44466
Epoch 31/100
Epoch 31: val_loss improved from 0.44466 to 0.42578, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\bestModel-fold1.hdf5
Epoch 32/100
Epoch 32: val_loss did not improve from 0.42578
Epoch 33/100
Epoch 33: val_loss did not improve from 0.42578
Epoch 34/100
Epoch 34: val_loss improved from 0.42578 to 0.39780, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\bestModel-fold1.hdf5
Epoch 35/100
Epoch 35: val_loss improved from 0.39780 to 0.38844, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\model

Epoch 59/100
Epoch 59: val_loss improved from 0.30682 to 0.30226, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\bestModel-fold1.hdf5
Epoch 60/100
Epoch 60: val_loss did not improve from 0.30226
Epoch 61/100
Epoch 61: val_loss did not improve from 0.30226
Epoch 62/100
Epoch 62: val_loss did not improve from 0.30226
Epoch 63/100
Epoch 63: val_loss improved from 0.30226 to 0.27676, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\bestModel-fold1.hdf5
Epoch 64/100
Epoch 64: val_loss did not improve from 0.27676
Epoch 65/100
Epoch 65: val_loss did not improve from 0.27676
Epoch 66/100
Epoch 66: val_loss did not improve from 0.27676
Epoch 67/100
Epoch 67: val_loss did not improve from 0.27676
Epoch 68/100
Epoch 68: val_loss did not improve from 0.27676
Epoch 69/100
Epoch 69: val_loss improved from 0.27676 to 0.26335, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\bestModel-fold1.hdf5
Epoch 70

Epoch 94: val_loss did not improve from 0.22742
Epoch 95/100
Epoch 95: val_loss did not improve from 0.22742
Epoch 96/100
Epoch 96: val_loss did not improve from 0.22742
Epoch 97/100
Epoch 97: val_loss did not improve from 0.22742
Epoch 98/100
Epoch 98: val_loss did not improve from 0.22742
Epoch 99/100
Epoch 99: val_loss did not improve from 0.22742
Epoch 100/100
Epoch 100: val_loss did not improve from 0.22742

Train/Test model on Fold #2.
Epoch 1/100


  super(Adam, self).__init__(name, **kwargs)


Epoch 1: val_loss improved from inf to 1.26854, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\bestModel-fold2.hdf5
Epoch 2/100
Epoch 2: val_loss improved from 1.26854 to 1.23458, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\bestModel-fold2.hdf5
Epoch 3/100
Epoch 3: val_loss improved from 1.23458 to 1.17998, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\bestModel-fold2.hdf5
Epoch 4/100
Epoch 4: val_loss improved from 1.17998 to 1.14774, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\bestModel-fold2.hdf5
Epoch 5/100
Epoch 5: val_loss improved from 1.14774 to 1.10363, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\bestModel-fold2.hdf5
Epoch 6/100
Epoch 6: val_loss improved from 1.10363 to 1.07876, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\bestModel-fold2.hdf5
Epoch 7/100
Epoch 7:

Epoch 29/100
Epoch 29: val_loss improved from 0.66935 to 0.64272, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\bestModel-fold2.hdf5
Epoch 30/100
Epoch 30: val_loss did not improve from 0.64272
Epoch 31/100
Epoch 31: val_loss improved from 0.64272 to 0.63878, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\bestModel-fold2.hdf5
Epoch 32/100
Epoch 32: val_loss improved from 0.63878 to 0.62101, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\bestModel-fold2.hdf5
Epoch 33/100
Epoch 33: val_loss did not improve from 0.62101
Epoch 34/100
Epoch 34: val_loss did not improve from 0.62101
Epoch 35/100
Epoch 35: val_loss did not improve from 0.62101
Epoch 36/100
Epoch 36: val_loss improved from 0.62101 to 0.59227, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\bestModel-fold2.hdf5
Epoch 37/100
Epoch 37: val_loss improved from 0.59227 to 0.54483, saving model to Re

Epoch 62: val_loss did not improve from 0.47410
Epoch 63/100
Epoch 63: val_loss did not improve from 0.47410
Epoch 64/100
Epoch 64: val_loss did not improve from 0.47410
Epoch 65/100
Epoch 65: val_loss did not improve from 0.47410
Epoch 66/100
Epoch 66: val_loss did not improve from 0.47410
Epoch 67/100
Epoch 67: val_loss improved from 0.47410 to 0.47016, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\bestModel-fold2.hdf5
Epoch 68/100
Epoch 68: val_loss improved from 0.47016 to 0.45403, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\bestModel-fold2.hdf5
Epoch 69/100
Epoch 69: val_loss did not improve from 0.45403
Epoch 70/100
Epoch 70: val_loss did not improve from 0.45403
Epoch 71/100
Epoch 71: val_loss did not improve from 0.45403
Epoch 72/100
Epoch 72: val_loss did not improve from 0.45403
Epoch 73/100
Epoch 73: val_loss did not improve from 0.45403
Epoch 74/100
Epoch 74: val_loss did not improve from 0.45403
Epoch 

Epoch 99/100
Epoch 99: val_loss did not improve from 0.43029
Epoch 100/100
Epoch 100: val_loss did not improve from 0.43029

Train/Test model on Fold #3.
Epoch 1/100


  super(Adam, self).__init__(name, **kwargs)


Epoch 1: val_loss improved from inf to 1.26464, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\bestModel-fold3.hdf5
Epoch 2/100
Epoch 2: val_loss improved from 1.26464 to 1.21792, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\bestModel-fold3.hdf5
Epoch 3/100
Epoch 3: val_loss improved from 1.21792 to 1.17404, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\bestModel-fold3.hdf5
Epoch 4/100
Epoch 4: val_loss improved from 1.17404 to 1.14274, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\bestModel-fold3.hdf5
Epoch 5/100
Epoch 5: val_loss improved from 1.14274 to 1.10459, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\bestModel-fold3.hdf5
Epoch 6/100
Epoch 6: val_loss improved from 1.10459 to 1.07559, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\bestModel-fold3.hdf5
Epoch 7/100
Epoch 7:

Epoch 32: val_loss did not improve from 0.90443
Epoch 33/100
Epoch 33: val_loss did not improve from 0.90443
Epoch 34/100
Epoch 34: val_loss did not improve from 0.90443
Epoch 35/100
Epoch 35: val_loss did not improve from 0.90443
Epoch 36/100
Epoch 36: val_loss did not improve from 0.90443
Epoch 37/100
Epoch 37: val_loss did not improve from 0.90443
Epoch 38/100
Epoch 38: val_loss did not improve from 0.90443
Epoch 39/100
Epoch 39: val_loss did not improve from 0.90443
Epoch 40/100
Epoch 40: val_loss did not improve from 0.90443
Epoch 41/100
Epoch 41: val_loss did not improve from 0.90443
Epoch 42/100
Epoch 42: val_loss did not improve from 0.90443
Epoch 43/100
Epoch 43: val_loss did not improve from 0.90443
Epoch 44/100
Epoch 44: val_loss did not improve from 0.90443
Epoch 45/100
Epoch 45: val_loss did not improve from 0.90443
Epoch 46/100
Epoch 46: val_loss did not improve from 0.90443
Epoch 47/100
Epoch 47: val_loss did not improve from 0.90443
Epoch 48/100
Epoch 48: val_loss did n

Epoch 71/100
Epoch 71: val_loss did not improve from 0.90443
Epoch 72/100
Epoch 72: val_loss did not improve from 0.90443
Epoch 73/100
Epoch 73: val_loss improved from 0.90443 to 0.90310, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\bestModel-fold3.hdf5
Epoch 74/100
Epoch 74: val_loss did not improve from 0.90310
Epoch 75/100
Epoch 75: val_loss did not improve from 0.90310
Epoch 76/100
Epoch 76: val_loss did not improve from 0.90310
Epoch 77/100
Epoch 77: val_loss did not improve from 0.90310
Epoch 78/100
Epoch 78: val_loss did not improve from 0.90310
Epoch 79/100
Epoch 79: val_loss did not improve from 0.90310
Epoch 80/100
Epoch 80: val_loss did not improve from 0.90310
Epoch 81/100
Epoch 81: val_loss did not improve from 0.90310
Epoch 82/100
Epoch 82: val_loss did not improve from 0.90310
Epoch 83/100
Epoch 83: val_loss did not improve from 0.90310
Epoch 84/100
Epoch 84: val_loss did not improve from 0.90310
Epoch 85/100
Epoch 85: val_loss did not

  super(Adam, self).__init__(name, **kwargs)


Epoch 1: val_loss improved from inf to 1.25792, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\bestModel-fold4.hdf5
Epoch 2/100
Epoch 2: val_loss improved from 1.25792 to 1.20627, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\bestModel-fold4.hdf5
Epoch 3/100
Epoch 3: val_loss improved from 1.20627 to 1.15740, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\bestModel-fold4.hdf5
Epoch 4/100
Epoch 4: val_loss improved from 1.15740 to 1.10342, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\bestModel-fold4.hdf5
Epoch 5/100
Epoch 5: val_loss improved from 1.10342 to 1.07366, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\bestModel-fold4.hdf5
Epoch 6/100
Epoch 6: val_loss improved from 1.07366 to 1.02735, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\bestModel-fold4.hdf5
Epoch 7/100
Epoch 7:

Epoch 29/100
Epoch 29: val_loss did not improve from 0.47833
Epoch 30/100
Epoch 30: val_loss did not improve from 0.47833
Epoch 31/100
Epoch 31: val_loss improved from 0.47833 to 0.45652, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\bestModel-fold4.hdf5
Epoch 32/100
Epoch 32: val_loss did not improve from 0.45652
Epoch 33/100
Epoch 33: val_loss improved from 0.45652 to 0.44119, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\bestModel-fold4.hdf5
Epoch 34/100
Epoch 34: val_loss improved from 0.44119 to 0.42325, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\bestModel-fold4.hdf5
Epoch 35/100
Epoch 35: val_loss did not improve from 0.42325
Epoch 36/100
Epoch 36: val_loss improved from 0.42325 to 0.41782, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\bestModel-fold4.hdf5
Epoch 37/100
Epoch 37: val_loss did not improve from 0.41782
Epoch 38/100
Epoch 38: 

Epoch 64/100
Epoch 64: val_loss did not improve from 0.34097
Epoch 65/100
Epoch 65: val_loss improved from 0.34097 to 0.33974, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\bestModel-fold4.hdf5
Epoch 66/100
Epoch 66: val_loss improved from 0.33974 to 0.32761, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\bestModel-fold4.hdf5
Epoch 67/100
Epoch 67: val_loss improved from 0.32761 to 0.32334, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\bestModel-fold4.hdf5
Epoch 68/100
Epoch 68: val_loss did not improve from 0.32334
Epoch 69/100
Epoch 69: val_loss did not improve from 0.32334
Epoch 70/100
Epoch 70: val_loss did not improve from 0.32334
Epoch 71/100
Epoch 71: val_loss did not improve from 0.32334
Epoch 72/100
Epoch 72: val_loss improved from 0.32334 to 0.31671, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\bestModel-fold4.hdf5
Epoch 73/100
Epoch 73: 

Epoch 97: val_loss improved from 0.27193 to 0.25635, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\bestModel-fold4.hdf5
Epoch 98/100
Epoch 98: val_loss did not improve from 0.25635
Epoch 99/100
Epoch 99: val_loss did not improve from 0.25635
Epoch 100/100
Epoch 100: val_loss did not improve from 0.25635


## k-fold Training evaluation

In [15]:
evaluations_df = pd.DataFrame.from_dict(evaluations)

evaluations_df_grouped = evaluations_df.groupby(["Train_Test"]).mean().filter(['Accuracy', 
                                                                               'Precision', 
                                                                               'AUC', 
                                                                               'Sensitivity', 
                                                                               'Specificity', 
                                                                               'MCC'])

evaluations_df_grouped

Unnamed: 0_level_0,Accuracy,Precision,AUC,Sensitivity,Specificity,MCC
Train_Test,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Test,0.927816,0.891339,0.889294,0.938522,0.891339,0.79906
Train,1.0,1.0,1.0,1.0,1.0,1.0


In [16]:
# batch 64
# 	Accuracy	Precision	AUC	Sensitivity	Specificity	MCC
# Train_Test						
# Test	0.962275	0.964691	0.968595	0.961289	0.964691	0.921092
# Train	0.998285	0.995658	0.999909	1.000000	0.995658	0.996416

In [17]:
# batch 32
# Accuracy	Precision	AUC	Sensitivity	Specificity	MCC
# Train_Test						
# Test	0.953655	0.947710	0.957286	0.958667	0.947710	0.903487
# Train	0.998284	0.995658	0.999985	1.000000	0.995658	0.996415

In [18]:
# batch 16
# 	Accuracy	Precision	AUC	Sensitivity	Specificity	MCC
# Train_Test						
# Test	0.957029	0.955322	0.972796	0.958206	0.955322	0.909738
# Train	0.998285	0.995658	0.999977	1.000000	0.995658	0.996416

In [19]:
evaluations_df[evaluations_df["Train_Test"] == "Test"]

Unnamed: 0,Fold,Train_Test,Accuracy,Precision,TPR,FPR,TPR_FPR_Thresholds,AUC,Sensitivity,Specificity,MCC
1,0,Test,0.926316,0.904762,"[0.0, 0.041666666666666664, 0.4583333333333333...","[0.0, 0.0, 0.0, 0.014084507042253521, 0.014084...","[1.9994674, 0.9994673, 0.9968348, 0.99101806, ...",0.872066,0.932432,0.904762,0.799497
3,1,Test,0.957447,0.913043,"[0.0, 0.043478260869565216, 0.8695652173913043...","[0.0, 0.0, 0.0, 0.014084507042253521, 0.014084...","[1.9974787, 0.99747866, 0.92067057, 0.88028693...",0.993876,0.971831,0.913043,0.884874
5,2,Test,0.925532,0.944444,"[0.0, 0.043478260869565216, 0.6956521739130435...","[0.0, 0.0, 0.0, 0.014084507042253521, 0.014084...","[1.9995296, 0.9995296, 0.9187731, 0.6772609, 0...",0.926516,0.921053,0.944444,0.792165
7,3,Test,0.861702,0.777778,"[0.0, 0.043478260869565216, 0.6086956521739131...","[0.0, 0.0, 0.0, 0.2676056338028169, 0.26760563...","[1.9992847, 0.9992848, 0.91563433, 0.03892002,...",0.677893,0.881579,0.777778,0.60349
9,4,Test,0.968085,0.916667,"[0.0, 0.043478260869565216, 0.9565217391304348...","[0.0, 0.0, 0.0, 0.5492957746478874, 0.54929577...","[1.997889, 0.9978891, 0.774578, 0.0012208459, ...",0.976118,0.985714,0.916667,0.915275


# Independent data

## Using k-fold Models

### Performance of each k-fold model

In [20]:
## create the evaluation data structure for all iterations
evaluations = {
    "Fold" : [],
    "Train_Test" : [],
    "Accuracy" : [],
    "Precision": [],
    "TPR": [],
    "FPR": [],
    "TPR_FPR_Thresholds": [],
    "AUC": [],
    "Sensitivity": [],
    "Specificity": [],
    "MCC":[]
}

##################################################################################
##### Prediction and metrics for Independent dataset
##################################################################################

for i in range(n_fold):
    
    current_model_path = os.path.join(modelPath, "bestModel-fold{}.hdf5".format(i))
    model = tf.keras.models.load_model(current_model_path)

    y_pred = model.predict(indpe_features)
    label_pred = pred2label(y_pred)

    # Compute precision, recall, sensitivity, specifity, mcc
    acc = accuracy_score(indpe_labels, label_pred)
    prec = precision_score(indpe_labels,label_pred)

    conf = confusion_matrix(indpe_labels, label_pred)
    if(conf[0][0]+conf[1][0]):
        sens = float(conf[0][0])/float(conf[0][0]+conf[1][0])
    else:
        sens = 0.0
    if(conf[1][1]+conf[0][1]):
        spec = float(conf[1][1])/float(conf[1][1]+conf[0][1])
    else:
        spec = 0.0
    if((conf[0][0]+conf[0][1])*(conf[0][0]+conf[1][0])*(conf[1][1]+conf[0][1])*(conf[1][1]+conf[1][0])):
        mcc = (float(conf[0][0])*float(conf[1][1]) - float(conf[1][0])*float(conf[0][1]))/math.sqrt((conf[0][0]+conf[0][1])*(conf[0][0]+conf[1][0])*(conf[1][1]+conf[0][1])*(conf[1][1]+conf[1][0]))
    else:
        mcc= 0.0
    fpr, tpr, thresholds = roc_curve(indpe_labels, y_pred)
    auc = roc_auc_score(indpe_labels, y_pred)

    evaluations["Fold"].append(i)
    evaluations["Train_Test"].append("Independent")
    evaluations["Accuracy"].append(acc)
    evaluations["Precision"].append(prec)
    evaluations["TPR"].append(tpr)
    evaluations["FPR"].append(fpr)
    evaluations["TPR_FPR_Thresholds"].append(thresholds)
    evaluations["AUC"].append(auc)
    evaluations["Sensitivity"].append(sens)
    evaluations["Specificity"].append(spec)
    evaluations["MCC"].append(mcc)

##################################################################################

evaluations_df = pd.DataFrame.from_dict(evaluations)

evaluations_df_grouped = evaluations_df.groupby(["Train_Test"]).mean().filter(['Accuracy', 
                                                                               'Precision', 
                                                                               'AUC', 
                                                                               'Sensitivity', 
                                                                               'Specificity', 
                                                                               'MCC'])

evaluations_df_grouped

Unnamed: 0_level_0,Accuracy,Precision,AUC,Sensitivity,Specificity,MCC
Train_Test,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Independent,0.912871,0.924495,0.862553,0.912884,0.924495,0.761365


In [21]:
evaluations_df

Unnamed: 0,Fold,Train_Test,Accuracy,Precision,TPR,FPR,TPR_FPR_Thresholds,AUC,Sensitivity,Specificity,MCC
0,0,Independent,0.915842,0.971429,"[0.0, 0.02, 0.68, 0.68, 0.7, 0.7, 0.74, 0.74, ...","[0.0, 0.0, 0.0, 0.006578947368421052, 0.006578...","[1.9996885, 0.9996885, 0.8709528, 0.75188875, ...",0.869474,0.904192,0.971429,0.767894
1,1,Independent,0.925743,1.0,"[0.0, 0.02, 0.7, 0.7, 0.72, 0.72, 0.74, 0.74, ...","[0.0, 0.0, 0.0, 0.039473684210526314, 0.039473...","[1.999047, 0.999047, 0.6875863, 0.16631983, 0....",0.8725,0.91018,1.0,0.798202
2,2,Independent,0.915842,0.945946,"[0.0, 0.02, 0.66, 0.66, 0.72, 0.72, 0.74, 0.74...","[0.0, 0.0, 0.0, 0.013157894736842105, 0.013157...","[1.9992348, 0.9992348, 0.8868928, 0.7917781, 0...",0.857368,0.909091,0.945946,0.766339
3,3,Independent,0.876238,0.755102,"[0.0, 0.02, 0.74, 0.74, 0.76, 0.76, 0.78, 0.78...","[0.0, 0.0, 0.0, 0.14473684210526316, 0.1447368...","[1.9998719, 0.99987185, 0.9158982, 0.27986592,...",0.851579,0.915033,0.755102,0.665578
4,4,Independent,0.930693,0.95,"[0.0, 0.04, 0.74, 0.74, 0.76, 0.76, 0.78, 0.78...","[0.0, 0.0, 0.0, 0.006578947368421052, 0.006578...","[1.9992347, 0.9992347, 0.70704263, 0.66808397,...",0.861842,0.925926,0.95,0.808813


### Mean score with k-fold models

In [22]:
## create the evaluation data structure for all iterations
evaluations = {
    "Train_Test" : [],
    "Accuracy" : [],
    "Precision": [],
    "TPR": [],
    "FPR": [],
    "TPR_FPR_Thresholds": [],
    "AUC": [],
    "Sensitivity": [],
    "Specificity": [],
    "MCC":[]
}

##################################################################################
##### Prediction and metrics for Independent dataset
##################################################################################

total_pred = np.zeros(indpe_labels.shape)
all_preds = []

for i in range(n_fold):
    
    current_model_path = os.path.join(modelPath, "bestModel-fold{}.hdf5".format(i))
    model = tf.keras.models.load_model(current_model_path)

    y_pred = model.predict(indpe_features)
    total_pred += y_pred
    all_preds.append(y_pred)
    
total_pred = total_pred / n_fold
label_pred = pred2label(total_pred)

# Compute precision, recall, sensitivity, specifity, mcc
acc = accuracy_score(indpe_labels, label_pred)
prec = precision_score(indpe_labels,label_pred)

conf = confusion_matrix(indpe_labels, label_pred)
if(conf[0][0]+conf[1][0]):
    sens = float(conf[0][0])/float(conf[0][0]+conf[1][0])
else:
    sens = 0.0
if(conf[1][1]+conf[0][1]):
    spec = float(conf[1][1])/float(conf[1][1]+conf[0][1])
else:
    spec = 0.0
if((conf[0][0]+conf[0][1])*(conf[0][0]+conf[1][0])*(conf[1][1]+conf[0][1])*(conf[1][1]+conf[1][0])):
    mcc = (float(conf[0][0])*float(conf[1][1]) - float(conf[1][0])*float(conf[0][1]))/math.sqrt((conf[0][0]+conf[0][1])*(conf[0][0]+conf[1][0])*(conf[1][1]+conf[0][1])*(conf[1][1]+conf[1][0]))
else:
    mcc= 0.0
fpr, tpr, thresholds = roc_curve(indpe_labels, total_pred)
auc = roc_auc_score(indpe_labels, total_pred)

evaluations["Train_Test"].append("Independent")
evaluations["Accuracy"].append(acc)
evaluations["Precision"].append(prec)
evaluations["TPR"].append(tpr)
evaluations["FPR"].append(fpr)
evaluations["TPR_FPR_Thresholds"].append(thresholds)
evaluations["AUC"].append(auc)
evaluations["Sensitivity"].append(sens)
evaluations["Specificity"].append(spec)
evaluations["MCC"].append(mcc)

##################################################################################

evaluations_df = pd.DataFrame.from_dict(evaluations)

evaluations_df_grouped = evaluations_df.groupby(["Train_Test"]).mean().filter(['Accuracy', 
                                                                               'Precision', 
                                                                               'AUC', 
                                                                               'Sensitivity', 
                                                                               'Specificity', 
                                                                               'MCC'])

evaluations_df_grouped

Unnamed: 0_level_0,Accuracy,Precision,AUC,Sensitivity,Specificity,MCC
Train_Test,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Independent,0.935644,1.0,0.876316,0.921212,1.0,0.825649


### Voting score with k-fold models

In [23]:
## create the evaluation data structure for all iterations
evaluations = {
    "Train_Test" : [],
    "Accuracy" : [],
    "Precision": [],
    "TPR": [],
    "FPR": [],
    "TPR_FPR_Thresholds": [],
    "AUC": [],
    "Sensitivity": [],
    "Specificity": [],
    "MCC":[]
}

##################################################################################
##### Prediction and metrics for Independent dataset
##################################################################################

total_pred = np.zeros(indpe_labels.shape)
all_preds = []

for i in range(n_fold):
    
    current_model_path = os.path.join(modelPath, "bestModel-fold{}.hdf5".format(i))
    model = tf.keras.models.load_model(current_model_path)

    y_pred = model.predict(indpe_features)
    vote_pred = pred2label(y_pred)
    total_pred += vote_pred
    all_preds.append(vote_pred)
    
total_pred = total_pred / n_fold
label_pred = pred2label(total_pred)

# Compute precision, recall, sensitivity, specifity, mcc
acc = accuracy_score(indpe_labels, label_pred)
prec = precision_score(indpe_labels,label_pred)

conf = confusion_matrix(indpe_labels, label_pred)
if(conf[0][0]+conf[1][0]):
    sens = float(conf[0][0])/float(conf[0][0]+conf[1][0])
else:
    sens = 0.0
if(conf[1][1]+conf[0][1]):
    spec = float(conf[1][1])/float(conf[1][1]+conf[0][1])
else:
    spec = 0.0
if((conf[0][0]+conf[0][1])*(conf[0][0]+conf[1][0])*(conf[1][1]+conf[0][1])*(conf[1][1]+conf[1][0])):
    mcc = (float(conf[0][0])*float(conf[1][1]) - float(conf[1][0])*float(conf[0][1]))/math.sqrt((conf[0][0]+conf[0][1])*(conf[0][0]+conf[1][0])*(conf[1][1]+conf[0][1])*(conf[1][1]+conf[1][0]))
else:
    mcc= 0.0
fpr, tpr, thresholds = roc_curve(indpe_labels, total_pred)
auc = roc_auc_score(indpe_labels, total_pred)

evaluations["Train_Test"].append("Independent")
evaluations["Accuracy"].append(acc)
evaluations["Precision"].append(prec)
evaluations["TPR"].append(tpr)
evaluations["FPR"].append(fpr)
evaluations["TPR_FPR_Thresholds"].append(thresholds)
evaluations["AUC"].append(auc)
evaluations["Sensitivity"].append(sens)
evaluations["Specificity"].append(spec)
evaluations["MCC"].append(mcc)

##################################################################################

evaluations_df = pd.DataFrame.from_dict(evaluations)

evaluations_df_grouped = evaluations_df.groupby(["Train_Test"]).mean().filter(['Accuracy', 
                                                                               'Precision', 
                                                                               'AUC', 
                                                                               'Sensitivity', 
                                                                               'Specificity', 
                                                                               'MCC'])

evaluations_df_grouped

Unnamed: 0_level_0,Accuracy,Precision,AUC,Sensitivity,Specificity,MCC
Train_Test,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Independent,0.930693,0.973684,0.889079,0.920732,0.973684,0.809928


## Using New Model

Train one model on full data from training. Predict and evaluate on Independent data.

In [24]:
model = DLNN_CORENup(input_seq_shape = input_seq_shape)
    
## Define the model callbacks for early stopping and saving the model. Then train model
current_model_path = os.path.join(modelPath, "_fullModel.hdf5")
modelCallbacks = [
    tf.keras.callbacks.ModelCheckpoint(current_model_path,
                                       monitor = 'val_loss', verbose = 1, save_best_only = True, 
                                       save_weights_only = False, mode = 'auto', save_freq = 'epoch'),
]

# adding random shuffling of the dataset for training purpose
index_arr = np.arange(train_features.shape[0])
index_arr = np.random.permutation(index_arr)

model.fit(x = train_features[index_arr], y = train_labels[index_arr], batch_size = batch_size, epochs = epochs, verbose = 1, 
          callbacks = modelCallbacks, validation_data = (indpe_features, indpe_labels))
# model.fit(x = train_features[index_arr], y = train_labels[index_arr], batch_size = batch_size, epochs = epochs, verbose = 1, 
#           callbacks = modelCallbacks, validation_split = 0.2)

model = tf.keras.models.load_model(current_model_path)

Epoch 1/100


  super(Adam, self).__init__(name, **kwargs)


Epoch 1: val_loss improved from inf to 1.23890, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\_fullModel.hdf5
Epoch 2/100
Epoch 2: val_loss improved from 1.23890 to 1.18821, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\_fullModel.hdf5
Epoch 3/100
Epoch 3: val_loss improved from 1.18821 to 1.12475, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\_fullModel.hdf5
Epoch 4/100
Epoch 4: val_loss improved from 1.12475 to 1.07983, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\_fullModel.hdf5
Epoch 5/100
Epoch 5: val_loss improved from 1.07983 to 1.02851, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\_fullModel.hdf5
Epoch 6/100
Epoch 6: val_loss improved from 1.02851 to 0.98422, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\_fullModel.hdf5
Epoch 7/100
Epoch 7: val_loss improved from 0.9842

Epoch 29/100
Epoch 29: val_loss improved from 0.58305 to 0.57925, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\_fullModel.hdf5
Epoch 30/100
Epoch 30: val_loss improved from 0.57925 to 0.55587, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\_fullModel.hdf5
Epoch 31/100
Epoch 31: val_loss did not improve from 0.55587
Epoch 32/100
Epoch 32: val_loss did not improve from 0.55587
Epoch 33/100
Epoch 33: val_loss did not improve from 0.55587
Epoch 34/100
Epoch 34: val_loss did not improve from 0.55587
Epoch 35/100
Epoch 35: val_loss improved from 0.55587 to 0.54995, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\_fullModel.hdf5
Epoch 36/100
Epoch 36: val_loss improved from 0.54995 to 0.54943, saving model to Results\NT_Site_iNitroY_Classification_DLNN_CORENup\5fold\models\_fullModel.hdf5
Epoch 37/100
Epoch 37: val_loss did not improve from 0.54943
Epoch 38/100
Epoch 38: val_loss improved fr

Epoch 62/100
Epoch 62: val_loss did not improve from 0.47946
Epoch 63/100
Epoch 63: val_loss did not improve from 0.47946
Epoch 64/100
Epoch 64: val_loss did not improve from 0.47946
Epoch 65/100
Epoch 65: val_loss did not improve from 0.47946
Epoch 66/100
Epoch 66: val_loss did not improve from 0.47946
Epoch 67/100
Epoch 67: val_loss did not improve from 0.47946
Epoch 68/100
Epoch 68: val_loss did not improve from 0.47946
Epoch 69/100
Epoch 69: val_loss did not improve from 0.47946
Epoch 70/100
Epoch 70: val_loss did not improve from 0.47946
Epoch 71/100
Epoch 71: val_loss did not improve from 0.47946
Epoch 72/100
Epoch 72: val_loss did not improve from 0.47946
Epoch 73/100
Epoch 73: val_loss did not improve from 0.47946
Epoch 74/100
Epoch 74: val_loss did not improve from 0.47946
Epoch 75/100
Epoch 75: val_loss did not improve from 0.47946
Epoch 76/100
Epoch 76: val_loss did not improve from 0.47946
Epoch 77/100
Epoch 77: val_loss did not improve from 0.47946
Epoch 78/100
Epoch 78: v



In [25]:
## create the evaluation data structure for all iterations
evaluations = {
    "Train_Test" : [],
    "Accuracy" : [],
    "Precision": [],
    "TPR": [],
    "FPR": [],
    "TPR_FPR_Thresholds": [],
    "AUC": [],
    "Sensitivity": [],
    "Specificity": [],
    "MCC":[]
}

##################################################################################
##### Prediction and metrics for Independent dataset
##################################################################################

y_pred = model.predict(indpe_features)
label_pred = pred2label(y_pred)

# Compute precision, recall, sensitivity, specifity, mcc
acc = accuracy_score(indpe_labels, label_pred)
prec = precision_score(indpe_labels,label_pred)

conf = confusion_matrix(indpe_labels, label_pred)
if(conf[0][0]+conf[1][0]):
    sens = float(conf[0][0])/float(conf[0][0]+conf[1][0])
else:
    sens = 0.0
if(conf[1][1]+conf[0][1]):
    spec = float(conf[1][1])/float(conf[1][1]+conf[0][1])
else:
    spec = 0.0
if((conf[0][0]+conf[0][1])*(conf[0][0]+conf[1][0])*(conf[1][1]+conf[0][1])*(conf[1][1]+conf[1][0])):
    mcc = (float(conf[0][0])*float(conf[1][1]) - float(conf[1][0])*float(conf[0][1]))/math.sqrt((conf[0][0]+conf[0][1])*(conf[0][0]+conf[1][0])*(conf[1][1]+conf[0][1])*(conf[1][1]+conf[1][0]))
else:
    mcc= 0.0
fpr, tpr, thresholds = roc_curve(indpe_labels, y_pred)
auc = roc_auc_score(indpe_labels, y_pred)

evaluations["Train_Test"].append("Independent")
evaluations["Accuracy"].append(acc)
evaluations["Precision"].append(prec)
evaluations["TPR"].append(tpr)
evaluations["FPR"].append(fpr)
evaluations["TPR_FPR_Thresholds"].append(thresholds)
evaluations["AUC"].append(auc)
evaluations["Sensitivity"].append(sens)
evaluations["Specificity"].append(spec)
evaluations["MCC"].append(mcc)

##################################################################################

evaluations_df = pd.DataFrame.from_dict(evaluations)

evaluations_df_grouped = evaluations_df.groupby(["Train_Test"]).mean().filter(['Accuracy', 
                                                                               'Precision', 
                                                                               'AUC', 
                                                                               'Sensitivity', 
                                                                               'Specificity', 
                                                                               'MCC'])

evaluations_df_grouped

Unnamed: 0_level_0,Accuracy,Precision,AUC,Sensitivity,Specificity,MCC
Train_Test,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Independent,0.915842,0.902439,0.891842,0.919255,0.902439,0.765787


In [26]:
# batch 64
# 	Accuracy	Precision	AUC	Sensitivity	Specificity	MCC
# Train_Test						
# Independent	0.98008	0.979381	0.987061	0.980519	0.979381	0.958107

In [27]:
# batch 32
# 	Accuracy	Precision	AUC	Sensitivity	Specificity	MCC
# Train_Test						
# Independent	0.960159	0.94	0.985928	0.97351	0.94	0.916733

In [28]:
# batch: 16
# 	Accuracy	Precision	AUC	Sensitivity	Specificity	MCC
# Train_Test						
# Independent	0.956175	0.957895	0.953381	0.955128	0.957895	0.90771

In [29]:
# out_fasta_file_name = '.'.join((training_data_file.split('.')[0], 'fasta'))
# out_fasta_file_path = os.path.join(input_data_folder, out_fasta_file_name)

# count = 0
# list_seqs = list(train_data['Sequence'])

# with open(out_fasta_file_path, "w") as out_file_obj:
#     for strLine in list_seqs:
        
#         #Output the header
#         out_file_obj.write(">" + str(count+1) + "\n")
#         out_file_obj.write(strLine + "\n")
        
#         count += 1