In [14]:
import numpy as np
from scipy.signal import medfilt
from sklearn.calibration import CalibratedClassifierCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report, f1_score

In [20]:
def calibrate_models(models, X_val, y_val, method='isotonic', cv=5):
    """
    Calibrate each pre-trained classifier on (X_val, y_val).
    """
    calibrated = {}
    for name, clf in models.items():
        calibrator = CalibratedClassifierCV(clf,method=method, cv=cv)
        calibrated[name] = calibrator.fit(X_val, y_val)
    return calibrated


In [21]:
def ensemble_probas(calibrated_models, X_test):
    """
    Soft-voting ensemble: average probability outputs.
    
    Returns ensembled probabilities and the class labels.
    """
    probas = [clf.predict_proba(X_test) for clf in calibrated_models.values()]
    proba_ens = np.mean(probas, axis=0)
    classes = list(calibrated_models.values())[0].classes_
    return proba_ens, classes


In [22]:
def get_final_labels(proba_ens, classes, smoothing=True):
    """
    Convert ensembled probabilities to discrete labels.
    
    Applies argmax + optional median smoothing.
    """
    idx = np.argmax(proba_ens, axis=1)
    y_pred = np.array(classes)[idx]
    if smoothing:
        y_pred = medfilt(y_pred, kernel_size=3)
    return y_pred

In [23]:
def evaluate_predictions(y_true, y_pred, labels, target_names):
    """
    Compute and print confusion matrix, classification report, and F1 scores.
    """
    cm = confusion_matrix(y_true, y_pred, labels=labels)
    print("Confusion Matrix (rows=true, cols=pred):")
    print(cm)
    print("\nClassification Report:")
    print(classification_report(y_true, y_pred,labels=labels, target_names=target_names, digits=3))
    f1_mac = f1_score(y_true, y_pred, average='macro', labels=labels)
    f1_mic = f1_score(y_true, y_pred, average='micro')
    print(f"Macro-averaged F1: {f1_mac:.3f}")
    print(f"Micro-averaged F1: {f1_mic:.3f}")

In [None]:
if __name__ == "__main__":
   #dummy data
    X, y = make_classification(
        n_samples=500,
        n_features=3,
        n_informative=3,
        n_redundant=0,
        n_classes=5,
        n_clusters_per_class=1,
        random_state=42
    )
    y = np.where(y == 4, 5, y)
    
    X_train, X_temp, y_train, y_temp = train_test_split( X, y, test_size=0.4, stratify=y, random_state=0 )
    X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, stratify=y_temp, random_state=0 )

    models = {
        'svm': SVC(probability=True, random_state=0),
        'rf':  RandomForestClassifier(n_estimators=100, random_state=0),
        'lr':  LogisticRegression(max_iter=1000, random_state=0)
    }
    for clf in models.values():
        clf.fit(X_train, y_train)

    #enhancement
    calibrated = calibrate_models(models, X_val, y_val,method='isotonic', cv=5)
    proba_ens, classes = ensemble_probas(calibrated, X_test)
    y_pred = get_final_labels(proba_ens, classes, smoothing=True)

    #Evaluate
    labels       = [0,1,2,3,5]
    target_names = ['Wake','N1','N2','N3','REM']
    evaluate_predictions(y_test, y_pred, labels, target_names)


Confusion Matrix (rows=true, cols=pred):
[[ 8  0  5  6  1]
 [ 2  6  5  7  0]
 [ 2  1 10  5  2]
 [ 0  3  2 12  3]
 [ 4  2  7  4  3]]

Classification Report:
              precision    recall  f1-score   support

        Wake      0.500     0.400     0.444        20
          N1      0.500     0.300     0.375        20
          N2      0.345     0.500     0.408        20
          N3      0.353     0.600     0.444        20
         REM      0.333     0.150     0.207        20

    accuracy                          0.390       100
   macro avg      0.406     0.390     0.376       100
weighted avg      0.406     0.390     0.376       100

Macro-averaged F1: 0.376
Micro-averaged F1: 0.390
