In [1]:
import pickle
import pandas as pd
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
from sklearn.preprocessing import label_binarize
import numpy as np

class dls:
    vocab = ['NM', 'EL', 'MD', 'SV']
dls.vocab

['NM', 'EL', 'MD', 'SV']

In [3]:
save_path = 'optuna_bests.pkl'
with open(save_path, 'rb') as f:
  results_rand, results_subj = pickle.load(f)

results_rand.columns

Index(['arch', 'hyperparams', 'total params', 'fold_no', 'train loss',
       'valid loss', 'accuracy', 'roc_score', 'time', 'test_acc', 'test_f1_EL',
       'test_f1_MD', 'test_f1_NM', 'test_f1_SV', 'test_auc_scr',
       'train_splits', 'test_splits', 'prob_scores', 'predictions', 'y_test',
       'test_f1_weighted'],
      dtype='object')

In [4]:
pd.DataFrame({'mean':results_rand[['roc_score','test_acc', 'test_f1_weighted',
              'test_f1_'+dls.vocab[0], 'test_f1_'+dls.vocab[1],
              'test_f1_'+dls.vocab[2], 'test_f1_'+dls.vocab[3]]].mean(axis=0).round(3),
             'std':results_rand[['roc_score','test_acc', 'test_f1_weighted',
              'test_f1_'+dls.vocab[0], 'test_f1_'+dls.vocab[1],
              'test_f1_'+dls.vocab[2], 'test_f1_'+dls.vocab[3]]].std(axis=0).round(3)})

Unnamed: 0,mean,std
roc_score,0.986,0.007
test_acc,0.908,0.023
test_f1_weighted,0.907,0.023
test_f1_NM,0.942,0.027
test_f1_EL,0.844,0.06
test_f1_MD,0.85,0.03
test_f1_SV,0.966,0.011


In [6]:
pd.DataFrame({'mean':results_subj[['roc_score','test_acc', 'test_f1_weighted',
              'test_f1_'+dls.vocab[0], 'test_f1_'+dls.vocab[1],
              'test_f1_'+dls.vocab[2], 'test_f1_'+dls.vocab[3]]].mean(axis=0).round(3),
             'std':results_subj[['roc_score','test_acc', 'test_f1_weighted',
              'test_f1_'+dls.vocab[0], 'test_f1_'+dls.vocab[1],
              'test_f1_'+dls.vocab[2], 'test_f1_'+dls.vocab[3]]].std(axis=0).round(3)})

Unnamed: 0,mean,std
roc_score,0.933,0.026
test_acc,0.762,0.057
test_f1_weighted,0.764,0.058
test_f1_NM,0.873,0.07
test_f1_EL,0.619,0.085
test_f1_MD,0.634,0.123
test_f1_SV,0.871,0.065


In [9]:
results_rand[['test_auc_scr', 'roc_score']]

Unnamed: 0,test_auc_scr,roc_score
0,0.986346,0.986346
1,0.979788,0.979788
2,0.985482,0.985482
3,0.996293,0.996293
4,0.980752,0.980752


In [5]:
predictions = np.array(results_rand['predictions'].values.tolist()).reshape(-1)
y_test = np.array(results_rand['y_test'].values.tolist()).reshape(-1)

metrics = {}
metrics['test_acc'] = accuracy_score(y_test, predictions)
metrics['test_f1_weighted'] = f1_score(y_test, predictions, labels=[0,1,2,3], average='weighted')
metrics['test_f1'] = f1_score(y_test, predictions, labels=[0,1,2,3], average=None)
metrics

{'test_acc': 0.9075862068965517,
 'test_f1_weighted': 0.906904046461367,
 'test_f1': array([0.84415584, 0.8502994 , 0.94182825, 0.96644295])}

In [77]:
predictions = []
[predictions.extend(i) for i in results_subj['predictions'].values];
predictions = np.array(predictions)
y_test = []
[y_test.extend(i) for i in results_subj['y_test'].values];
y_test = np.array(y_test)

metrics = {}
metrics['test_acc'] = accuracy_score(y_test, predictions)
metrics['test_f1_weighted'] = f1_score(y_test, predictions, labels=[0,1,2,3], average='weighted')
metrics['test_f1'] = f1_score(y_test, predictions, labels=[0,1,2,3], average=None)
metrics

{'test_acc': 0.7655172413793103,
 'test_f1_weighted': 0.7676498204680301,
 'test_f1': array([0.62666667, 0.64109589, 0.86908078, 0.88262911])}

In [87]:
from collections import Counter
print('random split value counts')
for i in range(5):
    print(f'fold {i}: ', Counter(results_rand['y_test'].values[i]))

print('subject based split value counts')
for i in range(5):
    print(f'fold {i}: ', Counter(results_subj['y_test'].values[i]))

random split value counts
fold 0:  Counter({3: 44, 2: 36, 1: 34, 0: 31})
fold 1:  Counter({3: 44, 2: 36, 1: 35, 0: 30})
fold 2:  Counter({3: 44, 2: 35, 1: 35, 0: 31})
fold 3:  Counter({3: 45, 2: 35, 1: 34, 0: 31})
fold 4:  Counter({3: 45, 2: 35, 1: 34, 0: 31})
subject based split value counts
fold 0:  Counter({3: 52, 2: 41, 1: 38, 0: 33})
fold 1:  Counter({2: 38, 3: 37, 1: 36, 0: 25})
fold 2:  Counter({1: 42, 2: 38, 3: 32, 0: 31})
fold 3:  Counter({3: 45, 2: 31, 0: 31, 1: 27})
fold 4:  Counter({3: 56, 0: 34, 2: 29, 1: 29})


In [73]:
from sklearn.metrics import roc_auc_score, precision_score, confusion_matrix
import numpy as np

def compute_additional_metrics(y_true, y_pred, y_prob=None, average='weighted'):
    """
    Compute AUC, PPV (Precision), and NPV for multiclass classification results.

    Parameters:
    - y_true: array-like of shape (n_samples,) - Ground truth labels.
    - y_pred: array-like of shape (n_samples,) - Predicted labels.
    - y_prob: array-like of shape (n_samples, n_classes) - Predicted probabilities for each class.
              Required for AUC calculation.
    - average: str - Averaging method for AUC and PPV (e.g., 'weighted', 'macro').

    Returns:
    - metrics: dict - Dictionary containing AUC, PPV, and NPV.
    """
    metrics = {}

    # AUC for multiclass (One-vs-Rest)
    if y_prob is not None:
        try:
            auc = roc_auc_score(y_true, y_prob, multi_class='ovr', average=average)
        except ValueError:
            auc = None  # Could not compute due to class imbalance or missing classes
        metrics['AUC'] = auc
    else:
        metrics['AUC'] = None

    # PPV (Precision)
    ppv = precision_score(y_true, y_pred, average=average, zero_division=0)
    metrics['PPV'] = ppv

    # NPV: calculated per class and averaged (weighted by class support)
    cm = confusion_matrix(y_true, y_pred)
    npv_list = []
    weights = []

    for i in range(cm.shape[0]):
        tp = cm[i, i]
        fn = np.sum(cm[i, :]) - tp
        fp = np.sum(cm[:, i]) - tp
        tn = np.sum(cm) - (tp + fp + fn)
        npv_class = tn / (tn + fn) if (tn + fn) > 0 else 0
        npv_list.append(npv_class)
        weights.append(np.sum(cm[i, :]))  # Support (number of true samples in class i)

    npv = np.average(npv_list, weights=weights)
    metrics['NPV'] = npv

    return metrics


In [85]:
predictions = []
[predictions.extend(i) for i in results_rand['predictions'].values];
predictions = np.array(predictions)
y_test = []
[y_test.extend(i) for i in results_rand['y_test'].values];
y_test = np.array(y_test)
prob_scores = []
[prob_scores.extend(i) for i in results_rand['prob_scores'].values];
prob_scores = np.array(prob_scores)
metrics = compute_additional_metrics(y_test, predictions, prob_scores)
print('random split')
print(metrics)

random split
{'AUC': 0.9872667723169826, 'PPV': 0.9067835045440245, 'NPV': 0.9715969041806334}


In [86]:
predictions = []
[predictions.extend(i) for i in results_subj['predictions'].values];
predictions = np.array(predictions)
y_test = []
[y_test.extend(i) for i in results_subj['y_test'].values];
y_test = np.array(y_test)
prob_scores = []
[prob_scores.extend(i) for i in results_subj['prob_scores'].values];
prob_scores = np.array(prob_scores)
metrics = compute_additional_metrics(y_test, predictions, prob_scores)
print('subject based split')
print(metrics)

subject based split
{'AUC': 0.9383254632754701, 'PPV': 0.7720312220575497, 'NPV': 0.9240367834900114}
