# Normalised confusion matrix value calculator

### Producing confusion matrices for each tested model, averaged over each imputation fold

Shubhayu Bhattacharyay
<br>
Ari Ercole

## I. Initialization

### Import necessary packages

In [1]:
# Fundamental methods
import os
import sys
import time
import glob
import random
import warnings
import itertools
import numpy as np
import pandas as pd
import pickle as cp
import seaborn as sns
from scipy import stats
from pathlib import Path
import matplotlib.pyplot as plt
warnings.filterwarnings(action="ignore")

# Keras and tensorflow methods
import keras
from keras.utils import to_categorical
from keras.models import Sequential, Model, load_model
from keras.layers import Input, Activation, Dense, Dropout, Conv2D, Flatten, LSTM, Permute, Reshape, AlphaDropout, BatchNormalization
import tensorflow as tf

# SciKit-Learn methods
from sklearn.metrics import confusion_matrix, accuracy_score, roc_auc_score
from sklearn.preprocessing import label_binarize
from sklearn.model_selection import StratifiedShuffleSplit 

## II. Confusion matrices of each model type

In [None]:
# Load compiled prediction results from each model variant
compiled_mnlr_results = pd.read_csv('../repeated_cv/compiled_predictions/mnlr.csv')
compiled_polr_results = pd.read_csv('../repeated_cv/compiled_predictions/polr.csv')
compiled_deepMN_results = pd.read_csv('../repeated_cv/compiled_predictions/deepMN.csv')
compiled_deepOR_results = pd.read_csv('../repeated_cv/compiled_predictions/deepOR.csv')

# Removed banned tuning indices from both DeepMN and DeepOR
deepMN_banned_tuning_indices = pd.read_csv('../repeated_cv/deepMN_banned_tuning_indices.csv')
deepMN_banned_tuning_indices = deepMN_banned_tuning_indices[deepMN_banned_tuning_indices['class'] == 'macro-averaged']

deepOR_banned_tuning_indices = pd.read_csv('../repeated_cv/deepOR_banned_tuning_indices.csv')
deepOR_banned_tuning_indices = deepOR_banned_tuning_indices[deepOR_banned_tuning_indices['class'] == 'macro-averaged']

compiled_deepMN_results = compiled_deepMN_results[~compiled_deepMN_results.tune_idx.isin(deepMN_banned_tuning_indices.tune_idx)]
compiled_deepOR_results = compiled_deepOR_results[~compiled_deepOR_results.tune_idx.isin(deepOR_banned_tuning_indices.tune_idx)]

# Identify viable columnns for AUROC consideration
viable_deepMN_tune_indices = compiled_deepMN_results.tune_idx.unique()
viable_deepOR_tune_indices = compiled_deepOR_results.tune_idx.unique()

# Identify columns that hold probability scores
prob_cols = [col for col in compiled_deepMN_results if col.startswith('prob_GOSE_')]

# Initialize empty dataframes to store AUROCs for each viable tuning index on each Repeat and Fold combination
deepMN_AUROCs = pd.DataFrame(np.empty((0,4)),columns = ['repeat.name','fold.name','tune_idx','AUROC'])
deepOR_AUROCs = pd.DataFrame(np.empty((0,4)),columns = ['repeat.name','fold.name','tune_idx','AUROC'])
mnlr_AUROCs = pd.DataFrame(np.empty((0,4)),columns = ['repeat.name','fold.name','SMOTE','AUROC'])
polr_AUROCs = pd.DataFrame(np.empty((0,4)),columns = ['repeat.name','fold.name','SMOTE','AUROC'])

# First calculate macro-averaged AUROC within each repeat and fold combination for each model type
for curr_repeat_idx in compiled_deepMN_results['repeat.name'].unique():
    curr_repeat_name = 'Repeat' + str(int(curr_repeat_idx)).zfill(2)
    for curr_fold_idx in compiled_deepMN_results[compiled_deepMN_results['repeat.name'] == curr_repeat_idx]['fold.name'].unique():
        curr_fold_name = 'Fold' + str(int(curr_fold_idx)).zfill(1)
        for curr_deepMN_tune_idx in viable_deepMN_tune_indices:
            curr_deepMN_results_idx =  (compiled_deepMN_results['repeat.name'] == curr_repeat_idx) & \
            (compiled_deepMN_results['fold.name'] == curr_fold_idx) & \
            (compiled_deepMN_results['tune_idx'] == curr_deepMN_tune_idx)
            curr_deepMN_results = compiled_deepMN_results[curr_deepMN_results_idx]
            curr_labels = label_binarize(curr_deepMN_results.true_labels.astype('int').values,classes=[1,3,4,5,6,7,8])
            curr_deepMN_auroc = roc_auc_score(y_true = curr_labels, y_score = curr_deepMN_results[prob_cols].values,average='macro',multi_class='ovr')
            curr_deepMN_auroc_df = pd.DataFrame({'repeat.name': curr_repeat_name,\
                                                'fold.name': curr_fold_name,\
                                                'tune_idx': [curr_deepMN_tune_idx],\
                                                'AUROC': [curr_deepMN_auroc]})    
            deepMN_AUROCs = deepMN_AUROCs.append(curr_deepMN_auroc_df, ignore_index = True)
        for curr_deepOR_tune_idx in viable_deepOR_tune_indices:
            curr_deepOR_results_idx =  (compiled_deepOR_results['repeat.name'] == curr_repeat_idx) & \
            (compiled_deepOR_results['fold.name'] == curr_fold_idx) & \
            (compiled_deepOR_results['tune_idx'] == curr_deepOR_tune_idx)
            curr_deepOR_results = compiled_deepOR_results[curr_deepOR_results_idx]
            curr_labels = label_binarize(curr_deepOR_results.true_labels.astype('int').values,classes=[1,3,4,5,6,7,8])
            curr_deepOR_auroc = roc_auc_score(y_true = curr_labels, y_score = curr_deepOR_results[prob_cols].values,average='macro',multi_class='ovr')
            curr_deepOR_auroc_df = pd.DataFrame({'repeat.name': curr_repeat_name,\
                                                'fold.name': curr_fold_name,\
                                                'tune_idx': [curr_deepOR_tune_idx],\
                                                'AUROC': [curr_deepOR_auroc]})    
            deepOR_AUROCs = deepOR_AUROCs.append(curr_deepOR_auroc_df, ignore_index = True)            
        for curr_SMOTE in [0,1]:
            curr_mnlr_results_idx =  (compiled_mnlr_results['repeat.name'] == curr_repeat_name) & \
            (compiled_mnlr_results['fold.name'] == curr_fold_name) & \
            (compiled_mnlr_results['SMOTE'] == curr_SMOTE)
            curr_mnlr_results = compiled_mnlr_results[curr_mnlr_results_idx]
            curr_labels = label_binarize(curr_mnlr_results['true.labels'].astype('int').values,classes=[1,3,4,5,6,7,8])
            curr_mnlr_auroc = roc_auc_score(y_true = curr_labels, y_score = curr_mnlr_results[prob_cols].values,average='macro',multi_class='ovr')
            curr_mnlr_auroc_df = pd.DataFrame({'repeat.name': curr_repeat_name,\
                                                'fold.name': curr_fold_name,\
                                                'SMOTE': [curr_SMOTE],\
                                                'AUROC': [curr_mnlr_auroc]}) 
            mnlr_AUROCs = mnlr_AUROCs.append(curr_mnlr_auroc_df, ignore_index = True)            
            
            curr_polr_results_idx =  (compiled_polr_results['repeat.name'] == curr_repeat_name) & \
            (compiled_polr_results['fold.name'] == curr_fold_name) & \
            (compiled_polr_results['SMOTE'] == curr_SMOTE)
            curr_polr_results = compiled_polr_results[curr_polr_results_idx]
            curr_labels = label_binarize(curr_polr_results['true.labels'].astype('int').values,classes=[1,3,4,5,6,7,8])
            curr_polr_auroc = roc_auc_score(y_true = curr_labels, y_score = curr_polr_results[prob_cols].values,average='macro',multi_class='ovr')
            curr_polr_auroc_df = pd.DataFrame({'repeat.name': curr_repeat_name,\
                                                'fold.name': curr_fold_name,\
                                                'SMOTE': [curr_SMOTE],\
                                                'AUROC': [curr_polr_auroc]}) 
            polr_AUROCs = polr_AUROCs.append(curr_polr_auroc_df, ignore_index = True)  

            
deepMN_AUROCs.to_csv('../metrics/deepMN_repeatedCV_aurocs.csv', index=False)
deepOR_AUROCs.to_csv('../metrics/deepOR_repeatedCV_aurocs.csv', index=False)
mnlr_AUROCs.to_csv('../metrics/mnlr_repeatedCV_aurocs.csv', index=False)
polr_AUROCs.to_csv('../metrics/polr_repeatedCV_aurocs.csv', index=False)

# Second, calculate optimal model configuration for each repeat
deepMN_repeats_AUROCs = deepMN_AUROCs.groupby(['tune_idx','repeat.name'],as_index=False)[['AUROC']].mean()
opt_deepMN_AUROCs = deepMN_repeats_AUROCs[deepMN_repeats_AUROCs.groupby(['repeat.name'])['AUROC'].transform(max) == deepMN_repeats_AUROCs['AUROC']].sort_values(by=['repeat.name']).reset_index(drop=True)

deepOR_repeats_AUROCs = deepOR_AUROCs.groupby(['tune_idx','repeat.name'],as_index=False)[['AUROC']].mean()
opt_deepOR_AUROCs = deepOR_repeats_AUROCs[deepOR_repeats_AUROCs.groupby(['repeat.name'])['AUROC'].transform(max) == deepOR_repeats_AUROCs['AUROC']].sort_values(by=['repeat.name']).reset_index(drop=True)

mnlr_repeats_AUROCs = mnlr_AUROCs.groupby(['SMOTE','repeat.name'],as_index=False)[['AUROC']].mean()
opt_mnlr_AUROCs = mnlr_repeats_AUROCs[mnlr_repeats_AUROCs.groupby(['repeat.name'])['AUROC'].transform(max) == mnlr_repeats_AUROCs['AUROC']].sort_values(by=['repeat.name']).reset_index(drop=True)

polr_repeats_AUROCs = polr_AUROCs.groupby(['SMOTE','repeat.name'],as_index=False)[['AUROC']].mean()
opt_polr_AUROCs = polr_repeats_AUROCs[polr_repeats_AUROCs.groupby(['repeat.name'])['AUROC'].transform(max) == polr_repeats_AUROCs['AUROC']].sort_values(by=['repeat.name']).reset_index(drop=True)

opt_deepMN_AUROCs.to_csv('../metrics/deepMN_optimal_repeatedCV_aurocs.csv', index=False)
opt_deepOR_AUROCs.to_csv('../metrics/deepOR_optimal_repeatedCV_aurocs.csv', index=False)
opt_mnlr_AUROCs.to_csv('../metrics/mnlr_optimal_repeatedCV_aurocs.csv', index=False)
opt_polr_AUROCs.to_csv('../metrics/polr_optimal_repeatedCV_aurocs.csv', index=False)

# Loop through optimal indices for results to calculate confusion matrix values per repeat
deepMN_CMs = pd.DataFrame(np.empty((0,5)),columns = ['true_labels','predicted_labels','cm_prob','repeat.name','tune_idx'])
deepOR_CMs = pd.DataFrame(np.empty((0,5)),columns = ['true_labels','predicted_labels','cm_prob','repeat.name','tune_idx'])
mnlr_CMs = pd.DataFrame(np.empty((0,5)),columns = ['true_labels','predicted_labels','cm_prob','repeat.name','tune_idx'])
polr_CMs = pd.DataFrame(np.empty((0,5)),columns = ['true_labels','predicted_labels','cm_prob','repeat.name','tune_idx'])

for curr_repeat_idx in compiled_deepMN_results['repeat.name'].unique():
    curr_repeat_name = 'Repeat' + str(int(curr_repeat_idx)).zfill(2)

    curr_opt_deepMN_tune_idx = opt_deepMN_AUROCs.tune_idx[opt_deepMN_AUROCs['repeat.name'] == curr_repeat_name].values[0]
    curr_deepMN_results = compiled_deepMN_results[(compiled_deepMN_results.tune_idx == curr_opt_deepMN_tune_idx) & (compiled_deepMN_results['repeat.name'] == curr_repeat_idx)]
    
    curr_deepMN_cm = confusion_matrix(curr_deepMN_results.true_labels, curr_deepMN_results.pred_labels,normalize='true')
    curr_deepMN_cm = pd.DataFrame(curr_deepMN_cm)
    curr_deepMN_cm.columns = ['GOSE: 1','GOSE: 2 or 3','GOSE: 4','GOSE: 5','GOSE: 6','GOSE: 7','GOSE: 8']
    curr_deepMN_cm = curr_deepMN_cm.assign(true_labels=['GOSE: 1','GOSE: 2 or 3','GOSE: 4','GOSE: 5','GOSE: 6','GOSE: 7','GOSE: 8'])
    curr_deepMN_cm = curr_deepMN_cm.melt(id_vars=['true_labels'],var_name='predicted_labels',value_name='cm_prob')
    curr_deepMN_cm['repeat.name'] = curr_repeat_name
    curr_deepMN_cm['tune_idx'] = curr_opt_deepMN_tune_idx
    deepMN_CMs = deepMN_CMs.append(curr_deepMN_cm, ignore_index = True)
    
    curr_opt_deepOR_tune_idx = opt_deepOR_AUROCs.tune_idx[opt_deepOR_AUROCs['repeat.name'] == curr_repeat_name].values[0]
    curr_deepOR_results = compiled_deepOR_results[(compiled_deepOR_results.tune_idx == curr_opt_deepOR_tune_idx) & (compiled_deepOR_results['repeat.name'] == curr_repeat_idx)]
    
    curr_deepOR_cm = confusion_matrix(curr_deepOR_results.true_labels, curr_deepOR_results.pred_labels,normalize='true')
    curr_deepOR_cm = pd.DataFrame(curr_deepOR_cm)
    curr_deepOR_cm.columns = ['GOSE: 1','GOSE: 2 or 3','GOSE: 4','GOSE: 5','GOSE: 6','GOSE: 7','GOSE: 8']
    curr_deepOR_cm = curr_deepOR_cm.assign(true_labels=['GOSE: 1','GOSE: 2 or 3','GOSE: 4','GOSE: 5','GOSE: 6','GOSE: 7','GOSE: 8'])
    curr_deepOR_cm = curr_deepOR_cm.melt(id_vars=['true_labels'],var_name='predicted_labels',value_name='cm_prob')
    curr_deepOR_cm['repeat.name'] = curr_repeat_name
    curr_deepOR_cm['tune_idx'] = curr_opt_deepOR_tune_idx
    deepOR_CMs = deepOR_CMs.append(curr_deepOR_cm, ignore_index = True)
    
    curr_opt_mnlr_tune_idx = opt_mnlr_AUROCs.SMOTE[opt_mnlr_AUROCs['repeat.name'] == curr_repeat_name].values[0]
    curr_mnlr_results = compiled_mnlr_results[(compiled_mnlr_results.SMOTE == int(curr_opt_mnlr_tune_idx)) & (compiled_mnlr_results['repeat.name'] == curr_repeat_name)]
    
    curr_mnlr_cm = confusion_matrix(curr_mnlr_results['true.labels'], curr_mnlr_results['pred.labels'],normalize='true')
    curr_mnlr_cm = pd.DataFrame(curr_mnlr_cm)
    curr_mnlr_cm.columns = ['GOSE: 1','GOSE: 2 or 3','GOSE: 4','GOSE: 5','GOSE: 6','GOSE: 7','GOSE: 8']
    curr_mnlr_cm = curr_mnlr_cm.assign(true_labels=['GOSE: 1','GOSE: 2 or 3','GOSE: 4','GOSE: 5','GOSE: 6','GOSE: 7','GOSE: 8'])
    curr_mnlr_cm = curr_mnlr_cm.melt(id_vars=['true_labels'],var_name='predicted_labels',value_name='cm_prob')
    curr_mnlr_cm['repeat.name'] = curr_repeat_name
    curr_mnlr_cm['SMOTE'] = curr_opt_mnlr_tune_idx
    mnlr_CMs = mnlr_CMs.append(curr_mnlr_cm, ignore_index = True)
    
    curr_opt_polr_tune_idx = opt_polr_AUROCs.SMOTE[opt_polr_AUROCs['repeat.name'] == curr_repeat_name].values[0]
    curr_polr_results = compiled_polr_results[(compiled_polr_results.SMOTE == int(curr_opt_polr_tune_idx)) & (compiled_polr_results['repeat.name'] == curr_repeat_name)]
    
    curr_polr_cm = confusion_matrix(curr_polr_results['true.labels'], curr_polr_results['pred.labels'],normalize='true')
    curr_polr_cm = pd.DataFrame(curr_polr_cm)
    curr_polr_cm.columns = ['GOSE: 1','GOSE: 2 or 3','GOSE: 4','GOSE: 5','GOSE: 6','GOSE: 7','GOSE: 8']
    curr_polr_cm = curr_polr_cm.assign(true_labels=['GOSE: 1','GOSE: 2 or 3','GOSE: 4','GOSE: 5','GOSE: 6','GOSE: 7','GOSE: 8'])
    curr_polr_cm = curr_polr_cm.melt(id_vars=['true_labels'],var_name='predicted_labels',value_name='cm_prob')
    curr_polr_cm['repeat.name'] = curr_repeat_name
    curr_polr_cm['SMOTE'] = curr_opt_polr_tune_idx
    polr_CMs = polr_CMs.append(curr_polr_cm, ignore_index = True)
    
deepMN_CMs.to_csv('../metrics/deepMN_confusionMatrices.csv', index=False)
deepOR_CMs.to_csv('../metrics/deepOR_confusionMatrices.csv', index=False)
mnlr_CMs.to_csv('../metrics/mnlr_confusionMatrices.csv', index=False)
polr_CMs.to_csv('../metrics/polr_confusionMatrices.csv', index=False)