In [34]:
# Cell 1: Import Libraries

import argparse
import os
import re
import scipy.io as sio
import numpy as np
from sklearn.metrics import cohen_kappa_score
from sklearn.metrics import confusion_matrix, f1_score


In [35]:
# Cell 2: Define Constants

W = 0
N1 = 1
N2 = 2
N3 = 3
REM = 4
classes = ['W', 'N1', 'N2', 'N3', 'REM']
n_classes = len(classes)


In [36]:
# Cell 3: Define Functions

def evaluate_metrics(cm):
    print("Confusion matrix:")
    print(cm)

    cm = cm.astype(np.float32)
    FP = cm.sum(axis=0) - np.diag(cm)
    FN = cm.sum(axis=1) - np.diag(cm)
    TP = np.diag(cm)
    TN = cm.sum() - (FP + FN + TP)
    # Sensitivity, hit rate, recall, or true positive rate
    TPR = TP / (TP + FN)
    # Specificity or true negative rate
    TNR = TN / (TN + FP)
    # Precision or positive predictive value
    PPV = TP / (TP + FP)
    # Negative predictive value
    NPV = TN / (TN + FN)
    # Fall out or false positive rate
    FPR = FP / (FP + TN)
    # False negative rate
    FNR = FN / (TP + FN)
    # False discovery rate
    FDR = FP / (TP + FP)

    # Overall accuracy
    ACC = (TP + TN) / (TP + FP + FN + TN)
    ACC_macro = np.mean(ACC) # to get a sense of effectiveness of our method on the small classes we computed this average (macro-average)

    F1 = (2 * PPV * TPR) / (PPV + TPR)
    F1_macro = np.mean(F1)

    print("Sample: {}".format(int(np.sum(cm))))
    for index_ in range(n_classes):
        print("{}: {}".format(classes[index_], int(TP[index_] + FN[index_])))

    return ACC_macro, ACC, F1_macro, F1, TPR, TNR, PPV


In [37]:
# Cell 4: Define Print Performance Function

def print_performance(cm, y_true=[], y_pred=[]):
    tp = np.diagonal(cm).astype(float)  # Use float instead of np.float
    tpfp = np.sum(cm, axis=0).astype(float) # sum of each col
    tpfn = np.sum(cm, axis=1).astype(float) # sum of each row
    acc = np.sum(tp) / np.sum(cm)
    precision = tp / tpfp
    recall = tp / tpfn
    f1 = (2 * precision * recall) / (precision + recall)

    FP = cm.sum(axis=0).astype(float) - np.diag(cm)
    FN = cm.sum(axis=1).astype(float) - np.diag(cm)
    TP = np.diag(cm).astype(float)
    TN = cm.sum().astype(float) - (FP + FN + TP)
    specificity = TN / (TN + FP) # TNR

    mf1 = np.mean(f1)

    print("Sample: {}".format(np.sum(cm)))
    print("W: {}".format(tpfn[W]))
    print("N1: {}".format(tpfn[N1]))
    print("N2: {}".format(tpfn[N2]))
    print("N3: {}".format(tpfn[N3]))
    print("REM: {}".format(tpfn[REM]))
    print("Confusion matrix:")
    print(cm)
    print("Precision(PPV): {}".format(precision))
    print("Recall(Sensitivity): {}".format(recall))
    print("Specificity: {}".format(specificity))
    print("F1: {}".format(f1))
    if len(y_true) > 0:
        print("Overall accuracy: {}".format(np.mean(y_true == y_pred)))
        print("Cohen's kappa score: {}".format(cohen_kappa_score(y_true, y_pred)))
    else:
        print("Overall accuracy: {}".format(acc))
    print("Macro-F1 accuracy: {}".format(mf1))


In [38]:
# Cell 5: Define Performance Overall Function

def perf_overall(data_dir):
    # Remove non-output files, and perform ascending sort
    allfiles = os.listdir(data_dir)
    outputfiles = [os.path.join(data_dir, f) for f in allfiles if re.match(r"^output_.+\d+\.npz", f)]
    outputfiles.sort()

    y_true = []
    y_pred = []
    for fpath in outputfiles:
        with np.load(fpath) as f:
            print(f["y_true"].shape)
            if len(f["y_true"].shape) == 1:
                if len(f["y_true"]) < 10:
                    f_y_true = np.hstack(f["y_true"])
                    f_y_pred = np.hstack(f["y_pred"])
                else:
                    f_y_true = f["y_true"]
                    f_y_pred = f["y_pred"]
            else:
                f_y_true = f["y_true"].flatten()
                f_y_pred = f["y_pred"].flatten()

            y_true.extend(f_y_true)
            y_pred.extend(f_y_pred)

            print("File: {}".format(fpath))
            cm = confusion_matrix(f_y_true, f_y_pred, labels=[0, 1, 2, 3, 4])
            print_performance(cm)
    print(" ")

    y_true = np.asarray(y_true)
    y_pred = np.asarray(y_pred)
    sio.savemat('con_matrix_sleep.mat', {'y_true': y_true, 'y_pred': y_pred})
    cm = confusion_matrix(y_true, y_pred, labels=range(n_classes))
    acc = np.mean(y_true == y_pred)
    mf1 = f1_score(y_true, y_pred, average="macro")

    total = np.sum(cm, axis=1)

    print("Ours:")
    print_performance(cm, y_true, y_pred)


In [39]:
# Cell 6: Define Main Function and Execute

# In Jupyter Notebook, the command line arguments are not used.
# Instead, directly call the function with the desired directory path.

data_dir = "./outputs_2013/outputs_eeg_fpz_cz"  # Set your data directory here
perf_overall(data_dir)


(1800,)
File: ./outputs_2013/outputs_eeg_fpz_cz\output_fold0.npz
Sample: 1800
W: 341.0
N1: 102.0
N2: 587.0
N3: 476.0
REM: 294.0
Confusion matrix:
[[330   5   0   1   5]
 [ 29  34  15   1  23]
 [  3   4 529  41  10]
 [  1   0  27 448   0]
 [  1   2  15   0 276]]
Precision(PPV): [0.90659341 0.75555556 0.90273038 0.91242363 0.87898089]
Recall(Sensitivity): [0.96774194 0.33333333 0.9011925  0.94117647 0.93877551]
Specificity: [0.97669637 0.99352179 0.95300907 0.96752266 0.9747676 ]
F1: [0.93617021 0.46258503 0.90196078 0.92657704 0.90789474]
Overall accuracy: 0.8983333333333333
Macro-F1 accuracy: 0.8270375620669131
(2200,)
File: ./outputs_2013/outputs_eeg_fpz_cz\output_fold1.npz
Sample: 2200
W: 299.0
N1: 193.0
N2: 1197.0
N3: 193.0
REM: 318.0
Confusion matrix:
[[ 259   24    0    3   13]
 [   1  122   16    0   54]
 [   3   43 1058   52   41]
 [   0    0    8  185    0]
 [   0    0    3    0  315]]
Precision(PPV): [0.98479087 0.64550265 0.97511521 0.77083333 0.74468085]
Recall(Sensitivity):