In [1]:
##################################################################################
##### Define all parameters for model tuning
##################################################################################

n_fold = 10
expName = "PSI_Site_Test_Run_current_setting1"
outPath = "Results"
foldName = "folds.pickle"

# modelNames = ["DLNN_3", "DLNN_5"]

epochs = 200
batch_size = 64
shuffle = False
seed = None


input_data_folder = "Data\\Psi_Site_Chen"

In [2]:
import os 
from Bio import SeqIO
import pickle
import numpy as np
import pandas as pd

import tensorflow as tf

from sklearn.metrics import roc_auc_score
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_curve, auc, accuracy_score, precision_score, confusion_matrix
from sklearn.metrics import roc_auc_score

import math

In [3]:
# print(tf.test.is_gpu_available(cuda_only=True))
# physical_devices = tf.config.experimental.list_physical_devices('GPU')
physical_devices = tf.config.list_physical_devices('GPU')
print(physical_devices)
tf.config.experimental.set_memory_growth(physical_devices[0], True)

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [4]:
##################################################################################
##### define all CUSTOM functions
##################################################################################

def one_hot_encode_dna(sequence):
    
    seq_encoded = np.zeros((len(sequence),4))
    dict_nuc = {
        "A": 0,
        "C": 1,
        "G": 2,
        "T":3
    }
    i = 0
    
    for single_character in sequence:
        if(single_character.upper() in dict_nuc.keys()):
            seq_encoded[i][dict_nuc[single_character.upper()]] = 1
            i = i+1
        else:
            return []
    
    return seq_encoded

def one_hot_encode_rna(sequence):
    
    seq_encoded = np.zeros((len(sequence),4))
    dict_nuc = {
        "A": 0,
        "C": 1,
        "G": 2,
        "U":3
    }
    i = 0
    
    for single_character in sequence:
        if(single_character.upper() in dict_nuc.keys()):
            seq_encoded[i][dict_nuc[single_character.upper()]] = 1
            i = i+1
        else:
            return []
    
    return seq_encoded

In [5]:
##################################################################################
##### define evaluator functions
##################################################################################

## Build the K-fold from dataset
def build_kfold(features, labels, k=10, shuffle=False, seed=None):
    
    skf = StratifiedKFold(n_splits=k, shuffle=shuffle, random_state=seed)
    kfoldList = []
    for train_index, test_index in skf.split(features, labels):
        X_train, X_test = features[train_index], features[test_index]
        y_train, y_test = labels[train_index], labels[test_index]
        kfoldList.append({
            "X_train": X_train,
            "X_test": X_test,
            "y_train":y_train,
            "y_test":y_test
        })
    return kfoldList

def pred2label(y_pred):
    y_pred = np.round(np.clip(y_pred, 0, 1))
    return y_pred

In [6]:
##################################################################################
##### Function to customize the DLNN architecture with parameters
##################################################################################

def DLNN_CORENup(input_shape = (21,4),
                   conv_filters_per_layer_1 = 50, kernel_length_1 = 5, conv_strides_1 = 1, ## 1st Convolutional layer parameters
                   max_pool_width_1 = 2, max_pool_stride_1 = 2, ## 1st Maxpool layer parameters
                   lstm_decode_units = 50, ## LSTM layer parameters
                   conv_filters_per_layer_2 = 50,  kernel_length_2 = 10, conv_strides_2 = 1, ## 2nd Convolutional layer parameters
                   max_pool_width_2 = 2, max_pool_stride_2 = 2, ## 2nd Maxpool layer parameters
                   dense_decode_units = 370, ## Dense layer parameters
                   prob = 0.5, learn_rate = 0.0003, loss = 'binary_crossentropy', metrics = None):
    
    beta = 0.001
    
    input1 = tf.keras.layers.Input(shape=input_shape)

    x1 = tf.keras.layers.Conv1D(conv_filters_per_layer_1, kernel_length_1, input_shape = input_shape, 
                                strides = conv_strides_1, kernel_regularizer = tf.keras.regularizers.l2(beta), 
                                padding = "same")(input1)
    x1 = tf.keras.layers.Activation('relu')(x1)
    x1 = tf.keras.layers.MaxPool1D(pool_size = max_pool_width_1, strides = max_pool_stride_1)(x1)
    x1 = tf.keras.layers.Dropout(prob)(x1)

    ## LSTM Path

    x2 = tf.keras.layers.LSTM(lstm_decode_units, return_sequences = True, 
                              kernel_regularizer = tf.keras.regularizers.l2(beta))(x1)
    x2 = tf.keras.layers.Dropout(prob)(x2)
    
    x2 = tf.keras.layers.Flatten()(x2)

    ## Conv Path

    x3 = tf.keras.layers.Conv1D(conv_filters_per_layer_2, kernel_length_2, strides = conv_strides_2, 
                                kernel_regularizer = tf.keras.regularizers.l2(beta), padding = 'same')(x1)
    x3 = tf.keras.layers.Activation('relu')(x3)
    x3 = tf.keras.layers.MaxPooling1D(pool_size = max_pool_width_2, strides = max_pool_stride_2)(x3)
    x3 = tf.keras.layers.Dropout(prob)(x3)
    
    x3 = tf.keras.layers.Flatten()(x3)

    ## Fully connected Layers

    y = tf.keras.layers.Concatenate(1)([x2,x3])
    
    y1 = tf.keras.layers.Dense(dense_decode_units, kernel_regularizer = tf.keras.regularizers.l2(beta), activation = 'relu')(y)
    
    y1 = tf.keras.layers.Dropout(prob)(y1)
    
    y1 = tf.keras.layers.Dense(1, kernel_regularizer = tf.keras.regularizers.l2(beta), activation = 'sigmoid')(y1)

    ## Generate Model from input and output
    model = tf.keras.models.Model(inputs=[input1], outputs=y1)
    
    ## Compile model
    if(metrics != None):
        model.compile(optimizer = tf.keras.optimizers.Adam(lr=learn_rate), loss = loss, metrics = metrics)
    else:
        model.compile(optimizer = tf.keras.optimizers.Adam(lr=learn_rate), loss = loss)

    return model

In [7]:
# input_data_folder = os.path.join("Data", "Psi_Site_Chen")

# for root, dirs, files in os.walk(input_data_folder):
#     for file in files:
#         print(os.path.join(root, file))

In [8]:
# input_data_file = "Data\Psi_Site_Chen\HS_990.txt"

In [9]:
# current_dataset_variety = input_data_file.split("\\")[-1].split(".")[0]

# openFile = open(input_data_file)
# fastaSequences = SeqIO.parse(openFile, "fasta")

In [10]:
# ##################################################################################
# ##### extract data from the current fasta file
# ##################################################################################

# positive_List = []
# negative_List = []
# positive_onehotencoded_List = []
# negative_onehotencoded_List = []

# for fasta in fastaSequences: 
#     name, sequence = fasta.id, str(fasta.seq)
#     if "P" in name:
#         positive_List.append(sequence)
#         aus_seq = one_hot_encode_rna(sequence)
#         if(len(aus_seq) != 0):
#             positive_onehotencoded_List.append(aus_seq)
#     elif "N" in name:
#         negative_List.append(sequence)
#         aus_seq = one_hot_encode_rna(sequence)
#         if(len(aus_seq) != 0):
#             negative_onehotencoded_List.append(aus_seq)
            
# openFile.close()
        
# print("\n======================================================================")
# print("\nFile: "+os.path.join(root, file))
# print("Positive: "+str(len(positive_onehotencoded_List)))
# print("Negative: "+str(len(negative_onehotencoded_List)))

In [11]:
# ##################################################################################
# ##### Generate Folds from dataset, and store to file
# ##################################################################################

# ## create the features and labels datasets for the training
# input_size = (len(positive_onehotencoded_List[1]), 4)
# labels = np.concatenate((np.ones((len(positive_onehotencoded_List), 1), dtype=np.float32), np.zeros((len(negative_onehotencoded_List), 1), dtype=np.float32)), axis=0)
# features = np.concatenate((positive_onehotencoded_List,negative_onehotencoded_List), 0)

# ## Generate the k-fold dataset
# folds = build_kfold(features, labels, k=n_fold, shuffle=shuffle, seed=seed)

# ## Write the k-fold dataset to file
# foldPath = os.path.join(outPath, expName, current_dataset_variety, "{}fold".format(n_fold))
# if(not os.path.isdir(foldPath)):
#     os.makedirs(foldPath)
# pickle.dump(folds, open(os.path.join(foldPath, foldName), "wb"))

In [12]:
##################################################################################
##### For each input file, train model and generate different outputs in a structured folder
##################################################################################

## create the evaluation data structure for all iterations
evaluations = {
    "Model" : [],
    "Kernel_Length" : [],
    "Dataset" : [],
    "Fold" : [],
    "Train_Test" : [],
    "Accuracy" : [],
    "Precision": [],
    "TPR": [],
    "FPR": [],
    "TPR_FPR_Thresholds": [],
    "AUC": [],
    "Sensitivity": [],
    "Specificity": [],
    "MCC":[]
}

In [15]:
for root, dirs, files in os.walk(input_data_folder):
    for file in files:
        
        input_data_file = os.path.join(root, file)
        
        current_dataset_variety = input_data_file.split("\\")[-1].split(".")[0]
        
        openFile = open(input_data_file)
        fastaSequences = SeqIO.parse(openFile, "fasta")
        
        ##################################################################################
        ##### extract data from the current fasta file
        ##################################################################################

        positive_List = []
        negative_List = []
        positive_onehotencoded_List = []
        negative_onehotencoded_List = []

        for fasta in fastaSequences: 
            name, sequence = fasta.id, str(fasta.seq)
            if "P" in name:
                positive_List.append(sequence)
                aus_seq = one_hot_encode_rna(sequence)
                if(len(aus_seq) != 0):
                    positive_onehotencoded_List.append(aus_seq)
            elif "N" in name:
                negative_List.append(sequence)
                aus_seq = one_hot_encode_rna(sequence)
                if(len(aus_seq) != 0):
                    negative_onehotencoded_List.append(aus_seq)

        openFile.close()

        print("\n======================================================================")
        print("\nFile: "+os.path.join(root, file))
        print("Positive: "+str(len(positive_onehotencoded_List)))
        print("Negative: "+str(len(negative_onehotencoded_List)))
        
        ##################################################################################
        ##### Generate Folds from dataset, and store to file
        ##################################################################################

        ## create the features and labels datasets for the training
        input_size = (len(positive_onehotencoded_List[1]), 4)
        labels = np.concatenate((np.ones((len(positive_onehotencoded_List), 1), dtype=np.float32), np.zeros((len(negative_onehotencoded_List), 1), dtype=np.float32)), axis=0)
        features = np.concatenate((positive_onehotencoded_List,negative_onehotencoded_List), 0)

        ## Generate the k-fold dataset
        folds = build_kfold(features, labels, k=n_fold, shuffle=shuffle, seed=seed)

        ## Write the k-fold dataset to file
        foldPath = os.path.join(outPath, expName, current_dataset_variety, "{}fold".format(n_fold))
        if(not os.path.isdir(foldPath)):
            os.makedirs(foldPath)
        pickle.dump(folds, open(os.path.join(foldPath, foldName), "wb"))

        ## Create and set directory to save model
        modelPath = os.path.join(outPath, expName, current_dataset_variety, "{}fold".format(n_fold), "models")
        if(not os.path.isdir(modelPath)):
            os.makedirs(modelPath)
            
        ##################################################################################
        ##### TRAIN and PREDICT for every Fold, using models
        ##################################################################################

        # fold counter
        i = 0

        for fold in folds:

            print("\nTrain/Test model "+current_dataset_variety+" on Fold #"+str(i)+".")

            kernel_length = 3
            ## Generate model using function
#             model = Conv_LSTM_DLNN(input_shape = input_size, conv_filters_per_layer = 50, kernel_length = kernel_length, 
#                                    lstm_decode_units = 50, max_pool_width = 2, max_pool_stride = 2, dense_decode_units = 50,
#                                    learn_rate = 0.0001, prob = 0.5, loss='binary_crossentropy', metrics=None)

            model = DLNN_CORENup(input_shape = input_size)

            ## Define the model callbacks for early stopping and saving the model. Then train model
            modelCallbacks = [
                tf.keras.callbacks.ModelCheckpoint(os.path.join(modelPath, "{}_bestModel-fold{}.hdf5".format(current_dataset_variety, i)),
                                                   monitor = 'val_loss', verbose = 0, save_best_only = True, 
                                                   save_weights_only = False, mode = 'auto', save_freq = 'epoch'),
                tf.keras.callbacks.EarlyStopping(monitor = 'val_loss', min_delta = 0, patience = 5, verbose = 0, 
                                                 mode = 'auto', baseline = None, restore_best_weights = False)
            ]
            model.fit(x = fold["X_train"], y = fold["y_train"], batch_size = batch_size, epochs = epochs, verbose = 0, 
                      callbacks = modelCallbacks, validation_data = (fold["X_test"], fold["y_test"]))

            ##################################################################################
            ##### Prediction and metrics for TRAIN dataset
            ##################################################################################

            y_pred = model.predict(fold["X_train"])
            label_pred = pred2label(y_pred)
            # Compute precision, recall, sensitivity, specifity, mcc
            acc = accuracy_score(fold["y_train"], label_pred)
            prec = precision_score(fold["y_train"],label_pred)

            conf = confusion_matrix(fold["y_train"], label_pred)
            if(conf[0][0]+conf[1][0]):
                sens = float(conf[0][0])/float(conf[0][0]+conf[1][0])
            else:
                sens = 0.0
            if(conf[1][1]+conf[0][1]):
                spec = float(conf[1][1])/float(conf[1][1]+conf[0][1])
            else:
                spec = 0.0
            if((conf[0][0]+conf[0][1])*(conf[0][0]+conf[1][0])*(conf[1][1]+conf[0][1])*(conf[1][1]+conf[1][0])):
                mcc = (float(conf[0][0])*float(conf[1][1]) - float(conf[1][0])*float(conf[0][1]))/math.sqrt((conf[0][0]+conf[0][1])*(conf[0][0]+conf[1][0])*(conf[1][1]+conf[0][1])*(conf[1][1]+conf[1][0]))
            else:
                mcc= 0.0
            fpr, tpr, thresholds = roc_curve(fold["y_train"], y_pred)
            auc = roc_auc_score(fold["y_train"], y_pred)

            evaluations["Model"].append(current_dataset_variety)
            evaluations["Kernel_Length"].append(kernel_length)
            evaluations["Dataset"].append(current_dataset_variety)
            evaluations["Fold"].append(i)
            evaluations["Train_Test"].append("Train")
            evaluations["Accuracy"].append(acc)
            evaluations["Precision"].append(prec)
            evaluations["TPR"].append(tpr)
            evaluations["FPR"].append(fpr)
            evaluations["TPR_FPR_Thresholds"].append(thresholds)
            evaluations["AUC"].append(auc)
            evaluations["Sensitivity"].append(sens)
            evaluations["Specificity"].append(spec)
            evaluations["MCC"].append(mcc)

            ##################################################################################
            ##### Prediction and metrics for TEST dataset
            ##################################################################################

            y_pred = model.predict(fold["X_test"])
            label_pred = pred2label(y_pred)
            # Compute precision, recall, sensitivity, specifity, mcc
            acc = accuracy_score(fold["y_test"], label_pred)
            prec = precision_score(fold["y_test"],label_pred)

            conf = confusion_matrix(fold["y_test"], label_pred)
            if(conf[0][0]+conf[1][0]):
                sens = float(conf[0][0])/float(conf[0][0]+conf[1][0])
            else:
                sens = 0.0
            if(conf[1][1]+conf[0][1]):
                spec = float(conf[1][1])/float(conf[1][1]+conf[0][1])
            else:
                spec = 0.0
            if((conf[0][0]+conf[0][1])*(conf[0][0]+conf[1][0])*(conf[1][1]+conf[0][1])*(conf[1][1]+conf[1][0])):
                mcc = (float(conf[0][0])*float(conf[1][1]) - float(conf[1][0])*float(conf[0][1]))/math.sqrt((conf[0][0]+conf[0][1])*(conf[0][0]+conf[1][0])*(conf[1][1]+conf[0][1])*(conf[1][1]+conf[1][0]))
            else:
                mcc= 0.0
            fpr, tpr, thresholds = roc_curve(fold["y_test"], y_pred)
            auc = roc_auc_score(fold["y_test"], y_pred)

            evaluations["Model"].append(current_dataset_variety)
            evaluations["Kernel_Length"].append(kernel_length)
            evaluations["Dataset"].append(current_dataset_variety)
            evaluations["Fold"].append(i)
            evaluations["Train_Test"].append("Test")
            evaluations["Accuracy"].append(acc)
            evaluations["Precision"].append(prec)
            evaluations["TPR"].append(tpr)
            evaluations["FPR"].append(fpr)
            evaluations["TPR_FPR_Thresholds"].append(thresholds)
            evaluations["AUC"].append(auc)
            evaluations["Sensitivity"].append(sens)
            evaluations["Specificity"].append(spec)
            evaluations["MCC"].append(mcc)

            i = i+1
            del model
            tf.keras.backend.clear_session()

        ##################################################################################
        ##### Dump evaluations to a file
        ##################################################################################

        evalPath = os.path.join(outPath, expName, "_Evaluation_All_Datasets")
        if(not os.path.isdir(evalPath)):
            os.makedirs(evalPath)

        pickle.dump(evaluations,
                    open(os.path.join(evalPath, "{}fold_evaluations.pickle".format(n_fold)), "wb"))



File: Data\Psi_Site_Chen\HS_990.txt
Positive: 495
Negative: 495

Train/Test model HS_990 on Fold #0.

Train/Test model HS_990 on Fold #1.

Train/Test model HS_990 on Fold #2.

Train/Test model HS_990 on Fold #3.

Train/Test model HS_990 on Fold #4.

Train/Test model HS_990 on Fold #5.

Train/Test model HS_990 on Fold #6.

Train/Test model HS_990 on Fold #7.

Train/Test model HS_990 on Fold #8.

Train/Test model HS_990 on Fold #9.


File: Data\Psi_Site_Chen\H_200.txt
Positive: 100
Negative: 100

Train/Test model H_200 on Fold #0.

Train/Test model H_200 on Fold #1.

Train/Test model H_200 on Fold #2.

Train/Test model H_200 on Fold #3.

Train/Test model H_200 on Fold #4.

Train/Test model H_200 on Fold #5.

Train/Test model H_200 on Fold #6.

Train/Test model H_200 on Fold #7.

Train/Test model H_200 on Fold #8.

Train/Test model H_200 on Fold #9.


File: Data\Psi_Site_Chen\MM_944.txt
Positive: 472
Negative: 472

Train/Test model MM_944 on Fold #0.

Train/Test model MM_944 on Fold #1.

## Visualization of Evaluation

In [None]:
##################################################################################
##### Add import statement here, to make this next part of code standalone executable
##################################################################################

import os
import pickle
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.ticker import ScalarFormatter, FormatStrFormatter
import numpy as np
import pandas as pd


In [16]:
##################################################################################
##### Load file and convert to dataframe for easy manipulation
##################################################################################

evalPath = os.path.join(outPath, expName, "_Evaluation_All_Datasets")
if(not os.path.isdir(evalPath)):
    os.makedirs(evalPath)

evaluations = pickle.load(open(os.path.join(evalPath, "{}fold_evaluations.pickle".format(n_fold)), "rb"))

evaluations_df = pd.DataFrame.from_dict(evaluations)

In [17]:
# evaluations["Model"] = evaluations["Model"][0:20]
# evaluations_df = pd.DataFrame.from_dict(evaluations)

In [18]:
##################################################################################
##### Group dataset (mean of metrics) by [Dataset, Model, Train_Test] combinations
##################################################################################

evaluations_df_grouped = evaluations_df.groupby(["Dataset", 
                                                 "Model", 
                                                 "Train_Test"]).mean().filter(['Accuracy', 
                                                                               'Precision', 
                                                                               'AUC', 
                                                                               'Sensitivity', 
                                                                               'Specificity', 
                                                                               'MCC'])

# DLNN_3 = evaluations_df_grouped[np.in1d(evaluations_df_grouped.index.get_level_values(1), ['DLNN_3'])]
# DLNN_5 = evaluations_df_grouped[np.in1d(evaluations_df_grouped.index.get_level_values(1), ['DLNN_5'])]

# DLNN_3_Train = DLNN_3[np.in1d(DLNN_3.index.get_level_values(2), ['Train'])]
# DLNN_3_Test = DLNN_3[np.in1d(DLNN_3.index.get_level_values(2), ['Test'])]

# DLNN_5_Train = DLNN_5[np.in1d(DLNN_5.index.get_level_values(2), ['Train'])]
# DLNN_5_Test = DLNN_5[np.in1d(DLNN_5.index.get_level_values(2), ['Test'])]

In [19]:
evaluations_df_grouped

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Accuracy,Precision,AUC,Sensitivity,Specificity,MCC
Dataset,Model,Train_Test,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
HS_990,HS_990,Test,0.589899,0.586619,0.645673,0.595828,0.586619,0.181047
HS_990,HS_990,Train,0.799551,0.799567,0.883722,0.806186,0.799567,0.602392
H_200,H_200,Test,0.655,0.657997,0.728,0.669094,0.657997,0.31839
H_200,H_200,Train,0.917778,0.919615,0.972506,0.917395,0.919615,0.836282
MM_944,MM_944,Test,0.675946,0.659468,0.741159,0.707424,0.659468,0.359433
MM_944,MM_944,Train,0.831449,0.796573,0.906614,0.876891,0.796573,0.668146
SS_628,SS_628,Test,0.659242,0.651863,0.705248,0.671126,0.651863,0.320547
SS_628,SS_628,Train,0.89083,0.880277,0.960674,0.903728,0.880277,0.782827
S_200,S_200,Test,0.72,0.744423,0.793,0.738122,0.744423,0.460054
S_200,S_200,Train,0.917778,0.903246,0.96979,0.934233,0.903246,0.836516


In [None]:
# ##################################################################################
# ##### Decide on metric to visualize
# ##################################################################################

# print("Metrics Available : ")
# print(list(evaluations_df_grouped.columns))

#### Select a metric to plot below:

In [None]:
# metric_to_plot = "Accuracy"

In [None]:
# ##################################################################################
# ##### Visualize with a multiple Bar chart
# ##################################################################################

# x = np.arange(len(DLNN_3_Train[metric_to_plot]))
# width = 0.15

# fig, ax = plt.subplots(figsize=(17,6))
# rects1 = ax.bar(x - (4*(width/2)), round(DLNN_3_Train[metric_to_plot]*100, 3), width, label='DLNN_3, Train')
# rects2 = ax.bar(x - (1.5*(width/2)), round(DLNN_5_Train[metric_to_plot]*100, 3), width, label='DLNN_5, Train')
# rects3 = ax.bar(x + (1.5*(width/2)), round(DLNN_3_Test[metric_to_plot]*100, 3), width, label='DLNN_3, Test')
# rects4 = ax.bar(x + (4*(width/2)), round(DLNN_5_Test[metric_to_plot]*100, 3), width, label='DLNN_5, Test')

# ## Custom y-axis tick labels
# ax.set_ylabel(metric_to_plot)
# ax.set_ylim([(math.floor(min(evaluations_df_grouped[metric_to_plot])*10)-1)*10, 
#             (math.ceil(max(evaluations_df_grouped[metric_to_plot])*10)+1)*10])
# # ax.set_ylim([80, 105])

# ## Custom x-axis tick labels
# ax.set_xticks(x)
# # ax.set_xticklabels(DLNN_3_Train.index.get_level_values(0))
# # ax.set_xticklabels([m+" - "+str(n) for m,n in 
# #                         zip(DLNN_3_Train.index.get_level_values(0),DLNN_3_Train.index.get_level_values(1))],
# #                   rotation=30)
# ax.set_xticklabels(DLNN_3_Train.index.get_level_values(0))

# ax.set_title(metric_to_plot+' by Dataset, Model, Train/Test')
# ax.legend(loc='upper left')

# def autolabel(rects):
#     for rect in rects:
#         height = rect.get_height()
#         ax.annotate('{}'.format(height),
#                     xy=(rect.get_x() + rect.get_width() / 2, height),
#                     xytext=(0, 3),  # 3 points vertical offset
#                     textcoords="offset points", 
#                     ha='center', va='bottom', rotation=90)

# autolabel(rects1)
# autolabel(rects2)
# autolabel(rects3)
# autolabel(rects4)

# plt.show()

### Store all metrics' plots to file

In [None]:
# ##################################################################################
# ##### Iteratively generate comparison plot using every metric
# ##################################################################################

# for metric_to_plot in list(evaluations_df_grouped.columns):
    
#     x = np.arange(len(DLNN_3_Train[metric_to_plot]))
#     width = 0.15

#     fig, ax = plt.subplots(figsize=(17,6))
#     rects1 = ax.bar(x - (4*(width/2)), round(DLNN_3_Train[metric_to_plot]*100, 3), width, label='DLNN_3, Train')
#     rects2 = ax.bar(x - (1.5*(width/2)), round(DLNN_5_Train[metric_to_plot]*100, 3), width, label='DLNN_5, Train')
#     rects3 = ax.bar(x + (1.5*(width/2)), round(DLNN_3_Test[metric_to_plot]*100, 3), width, label='DLNN_3, Test')
#     rects4 = ax.bar(x + (4*(width/2)), round(DLNN_5_Test[metric_to_plot]*100, 3), width, label='DLNN_5, Test')

#     ## Custom y-axis tick labels
#     ax.set_ylabel(metric_to_plot)
#     ax.set_ylim([(math.floor(min(evaluations_df_grouped[metric_to_plot])*10)-1)*10, 
#                 (math.ceil(max(evaluations_df_grouped[metric_to_plot])*10)+1)*10])
#     # ax.set_ylim([80, 105])

#     ## Custom x-axis tick labels
#     ax.set_xticks(x)
#     # ax.set_xticklabels(DLNN_3_Train.index.get_level_values(0))
#     # ax.set_xticklabels([m+" - "+str(n) for m,n in 
#     #                         zip(DLNN_3_Train.index.get_level_values(0),DLNN_3_Train.index.get_level_values(1))],
#     #                   rotation=30)
#     ax.set_xticklabels(DLNN_3_Train.index.get_level_values(0))

#     ax.set_title(metric_to_plot+' by Dataset, Model, Train/Test')
#     ax.legend(loc='upper left')

#     def autolabel(rects):
#         for rect in rects:
#             height = rect.get_height()
#             ax.annotate('{}'.format(height),
#                         xy=(rect.get_x() + rect.get_width() / 2, height),
#                         xytext=(0, 3),  # 3 points vertical offset
#                         textcoords="offset points", 
#                         ha='center', va='bottom', rotation=90)

#     autolabel(rects1)
#     autolabel(rects2)
#     autolabel(rects3)
#     autolabel(rects4)
    
#     plt.savefig(os.path.join(evalPath, "{}_DLNN_Comparison".format(metric_to_plot)))
#     plt.close()
    