In [None]:
import matplotlib
if 'init_done' in globals():
    matplotlib.use("pgf")
    matplotlib.rcParams.update({
        "pgf.texsystem": "pdflatex",
        'font.family': 'serif',
        'text.usetex': True,
        'pgf.rcfonts': False,
    })
import matplotlib.pyplot as plt

import numpy as np
import pickle
import sklearn.metrics
import pandas as pd

init_done = True

In [None]:
RANDOM_STATE = 42
MAX_ROUNDS = 100
PATIENCE = 30
X_KEY = 'window_length'

GROUPS = [
  { # Group 1:
    'name':             'ES: min. loss (fl, 8 clients)',
    'n_clients':        [8],
    'window_length':    [8,16,24],
    'min_los_icu':      24,
    'fl':               True,
    'folder_suffix':    ''
  },
  { # Group 2:
    'name':             'ES: max. F1 (fl, 8 clients)',
    'n_clients':        [8],
    'window_length':    [8,16,24],
    'min_los_icu':      24,
    'fl':               True,
    'folder_suffix':    '_ES-F1'
  }
]

# Prepare Data

## Load Groups

In [None]:
def load(group, n, l):
    # Create data path:
    path =  f'./scores/min{group["min_los_icu"]:d}h/{l:d}h{group["folder_suffix"]:s}/'
    path += ('scores_fl_' if group['fl'] and n != 1 else 'scores_')
    path += f'{n:d}clients_{l:d}h(min{group["min_los_icu"]:d}h).pickle'

    print(f'Loading file "{path:s}"', end='...')

    # Add subdictionaries to data-tree if necessary:
    if not 'scores' in  group:
        group['scores'] = {'train':{}, 'valid':{}, 'test':{}}
        
    if not 'predictions' in group:
        group['predictions'] = {}

    # Select key:
    key = None
    if X_KEY == 'n_clients':
        key = n
        
    elif X_KEY == 'window_length':
        key = l

    # Load data:
    try:
        with open(path, 'rb') as file:
            group['scores']['train'][key], group['scores']['valid'][key], group['scores']['test'][key], group['predictions'][key] = pickle.load(file)
    except:
        with open(path, 'rb') as file:
            group['scores']['train'][key], group['scores']['valid'][key], group['scores']['test'][key] = pickle.load(file)

    print(f'Done.')

In [None]:
for group in GROUPS:
    # Sort lists:
    group['n_clients'].sort()
    group['window_length'].sort()

    # Load data:
    for n in group['n_clients']:
        for l in group['window_length']:
            load(group, n, l)

## Recalculate Scores with sk-learn

In [None]:
from helpers import enumerate_predictions
n_labels = 2
n_fold = 5

def recalculate_scores(group, key):
    # Init scores-arrays:
        group['scores']['test'][key]['AUROC'] = np.zeros((n_fold, n_labels))
        group['scores']['test'][key]['AUPRC'] = np.zeros((n_fold, n_labels))
        group['scores']['test'][key]['F1'] = np.zeros((n_fold, n_labels))
        group['scores']['test'][key]['precision'] = np.zeros((n_fold, n_labels))
        group['scores']['test'][key]['recall'] = np.zeros((n_fold, n_labels))
        
        # Calculate actual number of scores:
        n = None
        if group['fl']:
            n = 1

        elif X_KEY == 'n_clients':
            n = key

        else:
            n = group['n_clients'][-1]
        
        for fold in range(n_fold):
            f = 1. / float(n) 
            for i in range(n):
                y_true = []
                y_pred = []
                for t, p in enumerate_predictions(group['predictions'][key], n_labels=n_labels, client=i, fold=fold):
                    y_true.append(t.astype(int))
                    y_pred.append(p.astype(float))
                y_true = np.array(y_true)
                y_pred = np.array(y_pred)

                # Calculate classification metrics:
                for label in range(n_labels):
                    prc_crv, rcl_crv, _ = sklearn.metrics.precision_recall_curve(y_true[:, label], y_pred[:, label])
                    precision = sklearn.metrics.precision_score(y_true[:, label], np.round(y_pred[:, label]))
                    recall = sklearn.metrics.recall_score(y_true[:, label], np.round(y_pred[:, label]))

                    group['scores']['test'][key]['AUROC'][fold, label] += f * sklearn.metrics.roc_auc_score(y_true[:, label], y_pred[:, label])
                    group['scores']['test'][key]['AUPRC'][fold, label] += f * sklearn.metrics.auc(rcl_crv, prc_crv)
                    group['scores']['test'][key]['F1'][fold, label] += f * 2 * precision * recall / (precision + recall)
                    group['scores']['test'][key]['precision'][fold, label] += f * precision
                    group['scores']['test'][key]['recall'][fold, label] += f * recall

In [None]:
for group in GROUPS:
    for key in group['predictions']:
        recalculate_scores(group, key)

# Analyze data

In [None]:
colors = [
    '#f37500', #(234, 117,   0)
#    '#069a2E', #(  6, 154,  46)
    '#3465a4', #( 52, 101, 164)
#    '#780373', #(120,   3, 115)
    '#f10d0c'  #(241,  13,  12)
]

c = 0
for group in GROUPS:
    group['color'] = colors[c]
    c += 1

In [None]:
def comparative_plot(metric, ax):
    for group in GROUPS:
        # Create x-values:
        x = group[X_KEY]

        # Calculate curves:
        y_avg = [np.nanmean(group['scores']['test'][n][metric][:,1:]) for n in x]
        y_std = [np.nanstd(group['scores']['test'][n][metric][:,1:]) for n in x]

        # Plot curves:
        ax.errorbar(x, y_avg, yerr=y_std, color=group['color'], label=group['name'], linestyle='--', fmt='o', linewidth=2, capsize=6)

    ax.set_xlabel(X_KEY)
    ax.set_ylabel(metric)
    ax.legend()

## Compare AUROC

In [None]:
fig = plt.figure(figsize=(8, 3))

comparative_plot('AUROC', fig.add_subplot(1, 1, 1))

plt.tight_layout()
plt.show()

## Plot AUPRC

In [None]:
fig = plt.figure(figsize=(8, 3))

comparative_plot('AUPRC', fig.add_subplot(1, 1, 1))

plt.tight_layout()
plt.show()

## Plot F1

In [None]:
fig = plt.figure(figsize=(8, 3))

comparative_plot('F1', fig.add_subplot(1, 1, 1))

plt.tight_layout()
plt.show()

## Plot recall and precision:

In [None]:

fig = plt.figure(figsize=(8, 3))

comparative_plot('precision', fig.add_subplot(1, 2, 1))
comparative_plot('recall', fig.add_subplot(1, 2, 2))

plt.tight_layout()
plt.show()