In [None]:
import matplotlib.pyplot as plt 
from sklearn.preprocessing import LabelBinarizer
from sklearn.metrics import roc_curve, auc, roc_auc_score

# flatten CNN output in such way that we change the probability columns (each only containing probas)
# to one column labeling classes as continues numbers. e.g. 1,2,3,4 (etc)
y_pred = np.argmax(y_pred,axis=-1)
y_val2 = y_val.copy()
y_val2 = np.argmax(y_val2, axis=-1)

# we have to define the targets
target= ['Ill', 'VMIll', 'MIll', 'MoIll']

# set plot figure size
fig, c_ax = plt.subplots(1,1, figsize = (12, 8))

# core function: here the roc score is calculated. The estimator results are calculated for all
# thresholds. Macro sets the evaluation to "one vs all".
def multiclass_roc_auc_score(y_val2, y_pred, average="macro"):
    lb = LabelBinarizer()
    lb.fit(y_val2)
    y_val2 = lb.transform(y_val2)
    y_pred = lb.transform(y_pred)

    for (idx, c_label) in enumerate(target):
        fpr, tpr, thresholds = roc_curve(y_val2[:,idx].astype(int), y_pred[:,idx])
        c_ax.plot(fpr, tpr, label = '%s (AUC:%0.2f)'  % (c_label, auc(fpr, tpr)))
    c_ax.plot(fpr, fpr, 'b-', label = 'Random Guessing')
    return roc_auc_score(y_val2, y_pred, average=average)

multiclass_roc_auc_score(y_val2, y_pred)
print('ROC AUC score:', multiclass_roc_auc_score(y_val2, y_pred))

c_ax.legend()
c_ax.set_xlabel('False Positive Rate')
c_ax.set_ylabel('True Positive Rate')
plt.show()