This file is used to evaluate the performance of Originator on simulated data

In [1]:
# This file is used to evaluation the performance of the model using the metrics including:
# 1) AUCROC (macro), 2) AUCROC (micro), 3) AUCPR, 4) F1 score, 5) MCC, and 6) ARI
# Input data:
# 1) A file containing truth labels (1st column), and predictions (2nd) columns

from traceback import print_tb
from sklearn.metrics import roc_auc_score
from sklearn.metrics import average_precision_score
from sklearn.metrics import f1_score
from sklearn.metrics import matthews_corrcoef
from sklearn.metrics.cluster import adjusted_rand_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

from sklearn import metrics
import matplotlib.pyplot as plt

import tensorflow as tf
import pandas as pd

  from pandas.core.computation.check import NUMEXPR_INSTALLED


In [4]:
base_input_path = "/nfs/dcmb-lgarmire/thatchau/originator_GB_revision/results/simulatedData_seeds/"

seeds = [0, 42, 64, 123, 894]
celltypes = ["tcell", "mono", "b"]
unified_celltypes = ["T-cell", "Monocyte", "B-cell"]

##### Evaluate results (change file name manually)

In [86]:
path_file_predictions = "/nfs/dcmb-lgarmire/thatchau/originator_GB_revision/results/simulatedData_seeds/b/894/prediction_TB_B-cell_TBannotation.csv"
df_file_predictions = pd.read_csv(path_file_predictions) 

list_truth_labels = (df_file_predictions.iloc[:, 6]).tolist()

list_predictions = (df_file_predictions.iloc[:, 5]).tolist()

list_truth_labels_one_hot = tf.keras.utils.to_categorical(list_truth_labels, num_classes=2)
list_predictions_one_hot = tf.keras.utils.to_categorical(list_predictions, num_classes=2)

In [87]:
# get AUC (macro)
auc_macro = roc_auc_score(list_truth_labels_one_hot, list_predictions_one_hot, average = 'macro')

# get AUC (micro)
auc_micro = roc_auc_score(list_truth_labels_one_hot, list_predictions_one_hot, average = 'micro')

# get AUCPR
auc_pr = average_precision_score(list_truth_labels_one_hot, list_predictions_one_hot, average = 'micro')

# get F1 score
f1 = f1_score(list_truth_labels_one_hot, list_predictions_one_hot, average = 'micro')

# get MCC
mcc = matthews_corrcoef(list_truth_labels, list_predictions)

# get ARI
ari = adjusted_rand_score(list_truth_labels, list_predictions)

In [88]:
print("auc_macro: ", auc_macro)
print("auc_micro: ", auc_micro)
print("auc_pr: ", auc_pr)
print("f1: ", f1)
print("MCC: ", mcc)
print("ARI: ", ari)

auc_macro:  0.9983361064891847
auc_micro:  0.9971202303815695
auc_pr:  0.9956886386454096
f1:  0.9971202303815695
MCC:  0.987825640726239
ARI:  0.9840291576953247


Calculate overall performace of cell types in the same seed

In [109]:
Bcell_predictions = pd.read_csv("/nfs/dcmb-lgarmire/thatchau/originator_GB_revision/results/simulatedData_seeds/b/894/prediction_TB_B-cell_TBannotation.csv")
Tcell_predictions = pd.read_csv("/nfs/dcmb-lgarmire/thatchau/originator_GB_revision/results/simulatedData_seeds/tcell/894/prediction_TB_T-cell_TBannotation.csv") 
mono_predictions = pd.read_csv("/nfs/dcmb-lgarmire/thatchau/originator_GB_revision/results/simulatedData_seeds/mono/894/prediction_TB_Monocyte_TBannotation.csv")                        

In [110]:
df_concat = Bcell_predictions.append(Tcell_predictions)
df_concat = df_concat.append(mono_predictions)

  df_concat = Bcell_predictions.append(Tcell_predictions)
  df_concat = df_concat.append(mono_predictions)


In [111]:
list_truth_labels = (df_concat.iloc[:, 6]).tolist()

list_predictions = (df_concat.iloc[:, 5]).tolist()

list_truth_labels_one_hot = tf.keras.utils.to_categorical(list_truth_labels, num_classes=2)
list_predictions_one_hot = tf.keras.utils.to_categorical(list_predictions, num_classes=2)

# get AUC (macro)
auc_macro = roc_auc_score(list_truth_labels_one_hot, list_predictions_one_hot, average = 'macro')

# get AUC (micro)
auc_micro = roc_auc_score(list_truth_labels_one_hot, list_predictions_one_hot, average = 'micro')

# get AUCPR
auc_pr = average_precision_score(list_truth_labels_one_hot, list_predictions_one_hot, average = 'micro')

# get F1 score
f1 = f1_score(list_truth_labels_one_hot, list_predictions_one_hot, average = 'micro')

# get MCC
mcc = matthews_corrcoef(list_truth_labels, list_predictions)

# get ARI
ari = adjusted_rand_score(list_truth_labels, list_predictions)

In [112]:
print("auc_macro: ", auc_macro)
print("auc_micro: ", auc_micro)
print("auc_pr: ", auc_pr)
print("f1: ", f1)
print("MCC: ", mcc)
print("ARI: ", ari)

auc_macro:  0.9550681528909919
auc_micro:  0.9689698492462311
auc_pr:  0.9544176441251484
f1:  0.9689698492462312
MCC:  0.9035755776808341
ARI:  0.8621752054827151
