In [None]:
import matplotlib
if 'init_done' in globals():
    matplotlib.use("pgf")
    matplotlib.rcParams.update({
        "pgf.texsystem": "pdflatex",
        'font.family': 'serif',
        'text.usetex': True,
        'pgf.rcfonts': False,
    })
import matplotlib.pyplot as plt

import numpy as np
import pickle
import sklearn.metrics
import pandas as pd

init_done = True

In [None]:
RANDOM_STATE = 42
WINDOW_LENGTH = 24
MIN_LOS_ICU = 24
N_CLIENTS = [2,4,8]
MAX_ROUNDS = 100
PATIENCE = 30
FOLDER_SUFFIXES = {'':'loss'}

# Load result files 

In [None]:
scores_train = {suffix:{'cml':{}, 'fl':{}, 'lml':{}} for suffix in FOLDER_SUFFIXES}
scores_valid = {suffix:{'cml':{}, 'fl':{}, 'lml':{}} for suffix in FOLDER_SUFFIXES}
scores_test =  {suffix:{'cml':{}, 'fl':{}, 'lml':{}} for suffix in FOLDER_SUFFIXES}
predictions =  {suffix:{'cml':{}, 'fl':{}, 'lml':{}} for suffix in FOLDER_SUFFIXES}

In [None]:
def load(n_clients, fl=False):
    for suffix in FOLDER_SUFFIXES:
        path =  f'../scores/min{MIN_LOS_ICU:d}h/{WINDOW_LENGTH:d}h{suffix:s}/'
        path += ('scores_fl_' if fl else 'scores_')
        path += f'{n_clients:d}clients_{WINDOW_LENGTH:d}h(min{MIN_LOS_ICU:d}h).pickle'

        print(f'Loading file "{path:s}"', end='...')

        key = 'cml' if n_clients == 1 else 'fl' if fl else 'lml'

        try:
            with open(path, 'rb') as file:
                scores_train[suffix][key][n_clients], scores_valid[suffix][key][n_clients], scores_test[suffix][key][n_clients], predictions[suffix][key][n_clients] = pickle.load(file)
        except:
            with open(path, 'rb') as file:
                scores_train[suffix][key][n_clients], scores_valid[suffix][key][n_clients], scores_test[suffix][key][n_clients] = pickle.load(file)

        print(f'Done.')

## Load central scores

In [None]:
load(n_clients=1)

## Load FL-scores

In [None]:
for n in N_CLIENTS:
    load(n_clients=n, fl=True)

## Load local scores

In [None]:
for n in N_CLIENTS:
    load(n_clients=n)

## Calculate scores with sk-learn:

In [None]:
import assets
from helpers import enumerate_predictions
n_labels = 2
n_fold = 5

In [None]:
for suffix in predictions:
    for model in predictions[suffix]:
        for n_clients in predictions[suffix][model]:
            # Init scores-arrays:
            scores_test[suffix][model][n_clients]['AUROC'] = np.zeros((n_fold, n_labels))
            scores_test[suffix][model][n_clients]['AUPRC'] = np.zeros((n_fold, n_labels))
            scores_test[suffix][model][n_clients]['precision'] = np.zeros((n_fold, n_labels))
            scores_test[suffix][model][n_clients]['recall'] = np.zeros((n_fold, n_labels))
            
            # Calculate actual number of scores:
            n = 1 if model=='fl' else n_clients
            
            for fold in range(n_fold):
                f = 1. / float(n) 
                for i in range(n):
                    y_true = []
                    y_pred = []
                    for t, p in enumerate_predictions(predictions[suffix][model][n_clients], n_labels=n_labels, client=i, fold=fold):
                        y_true.append(t.astype(int))
                        y_pred.append(p.astype(float))
                    y_true = np.array(y_true)
                    y_pred = np.array(y_pred)

                    # Calculate classification metrics:
                    for label in range(n_labels):
                        prc_crv, rcl_crv, _ = sklearn.metrics.precision_recall_curve(y_true[:, label], y_pred[:, label])

                        scores_test[suffix][model][n_clients]['AUROC'][fold, label]     += f * sklearn.metrics.roc_auc_score(y_true[:, label], y_pred[:, label])
                        scores_test[suffix][model][n_clients]['AUPRC'][fold, label]     += f * sklearn.metrics.auc(rcl_crv, prc_crv)
                        scores_test[suffix][model][n_clients]['precision'][fold, label] += f * sklearn.metrics.precision_score(y_true[:, label], np.round(y_pred[:, label]))
                        scores_test[suffix][model][n_clients]['recall'][fold, label]    += f * sklearn.metrics.recall_score(y_true[:, label], np.round(y_pred[:, label]))

## Calculate F1-score:

In [None]:
def add_f1(scores):
    precision = scores['precision']
    recall = scores['recall']

    scores['F1'] = 2 * precision * recall / (precision + recall)

In [None]:
for suffix in scores_train:
    for model in scores_train[suffix]:
        for n_clients in scores_train[suffix][model]:
            add_f1(scores_train[suffix][model][n_clients])

In [None]:
for suffix in scores_valid:
    for model in scores_valid[suffix]:
        for n_clients in scores_valid[suffix][model]:
            add_f1(scores_valid[suffix][model][n_clients])

In [None]:
for suffix in scores_test:
    for model in scores_test[suffix]:
        for n_clients in scores_test[suffix][model]:
            add_f1(scores_test[suffix][model][n_clients])

# Analize results

### Create latex table:

In [None]:
metrics = ['AUROC', 'AUPRC', 'F1', 'precision', 'recall']

In [None]:
for suffix in FOLDER_SUFFIXES:
    print('============================================================')
    print(f'Suffix: "{suffix:s}"')
    print('------------------------------------------------------------\n')
    
    table = ''
    for m in metrics:
        # Row title:
        table += f'\\head\u007B{m:s}\u007D\t'

        # Central model score:
        avg = np.nanmean(scores_test[suffix]['cml'][1][m][:,1:])
        std = np.nanstd(scores_test[suffix]['cml'][1][m][:,1:])
        table += f'& ${avg:.2f} \\pm {std:.2f}$\t'

        # FL and local model scores:
        for model in ['fl', 'lml']:
            for n_clients in N_CLIENTS:
                avg = np.nanmean(scores_test[suffix][model][n_clients][m][:,1:])
                std = np.nanstd(scores_test[suffix][model][n_clients][m][:,1:])
                table += f'& ${avg:.2f} \\pm {std:.2f}$\t'

        table += '\\\\\n'

    print(table)
print('============================================================')

### Create plots:

In [None]:
def add_curve(scores,  color, label, ax, client=None, round=None, step=1, bounds=None):
    # extract and reshape y-values:
    y = None
    if client != None:
        if round != None:   y = scores[round, client].reshape((1, scores.shape[-1])).copy()
        else:               y = scores[:, client].copy()

    else:
        if round != None:   y = scores[round, :].copy()
        else:               y = scores.reshape((np.prod(scores.shape[:-1]), scores.shape[-1])).copy()

    # find value counts and fill curves with last value:
    counts = np.empty(y.shape[0], dtype='int')
    for i in range(y.shape[0]):
        last_value = np.nan

        for j in range(y.shape[1]):
            if np.isnan(y[i,j]):
                y[i,j] = last_value
            
            else:
                last_value = y[i,j]
                counts[i] = j + 1

    # calculate x-values:
    n = np.max(counts)
    x = np.arange(1,(n*step)+1,step)
    
    # plot curve:
    ax.plot(x[:n], np.mean(y, axis=0)[:n], color=color, label=label)

    if len(counts) > 1:
        if bounds == 'mean_std':
            # Plot standard deviation:
            y_avg = np.mean(y, axis=0)
            ax.fill_between(
                x,
                np.nanmean(np.where(y < y_avg, y, np.nan), axis=0)[:n],
                np.nanmean(np.where(y > y_avg, y, np.nan), axis=0)[:n],
                color=color,
                alpha=.25
            )

        if bounds == 'min_max':
            # Plot min and max:
            y_min = np.min(y, axis=0)[:n]
            y_max = np.max(y, axis=0)[:n]
            ax.fill_between(x, y_min, y_max, color=color, alpha=.25)

    return n

In [None]:
def add_best_weights(scores, color, label, ax, client=None, round=None, bounds=None):
    # find value counts:
    counts = np.empty(scores.shape[0:2], dtype='int')
    for i in [round] if round != None else range(scores.shape[0]):
        for j in [client] if client != None else range(scores.shape[1]):
            for n in range(scores.shape[2]):
                if not np.isnan(scores[i,j,n]):
                    counts[i,j] = n + 1

    x = counts[counts < MAX_ROUNDS].flatten() - PATIENCE
    y_lims = (0, ax.get_ylim()[1])

    if bounds == 'mean_std':
        # Print best weights with mean and standard deviation:
        x_avg = np.mean(x)
        ax.axvline(x=x_avg, color=color, linestyle='--', label=label)

        if len(x) > 1:
            ax.fill_betweenx(y_lims, np.mean(x[x < x_avg]), np.mean(x[x > x_avg]), color=color, alpha=.25)


    else:
        # Print best weights with min and max:
        x_min = np.min(x)
        ax.axvline(x=x_min, color=color, linestyle='--', label=label)

        if len(x) > 1:
            x_max = np.max(x)
            ax.axvline(x=x_max, color=color, linestyle='--')
            ax.fill_betweenx(y_lims, x_min, x_max, color=color, alpha=.25)

    ax.set_ylim(y_lims)

    return np.max(counts)


## Overview plot:

In [None]:
def exploratory_plot(metric, n_clients=1, fl=False, suffix=''):
    n_rounds = 5

    fig = plt.figure(figsize=(2*n_rounds, 1.5*n_clients))

    for i in range(n_rounds):
        for j in range(n_clients):
            ax = fig.add_subplot(n_clients, n_rounds, j*n_rounds + i + 1)

            # Choose model:
            model = 'fl' if fl else 'lml'
            if n_clients==1:
                model = 'cml'

            # Print curves:
            add_curve(scores_train[suffix][model][n_clients][metric], '#3465a4', 'train', ax, client=j, round=i, bounds=None)
            add_curve(scores_valid[suffix][model][n_clients][metric], '#f37500', 'valid', ax, client=j, round=i, bounds=None)

            # Print best weights:
            n = add_best_weights(scores_train[suffix][model][n_clients][metric], '#f10d0c', 'best', ax, client=j, bounds='mean_std')

            ax.set_xticks(np.arange(n+1, step=10))

            if i == 0:
                ax.set_ylabel(metric)

            if j == 0:
                ax.set_title(f'CV-iteration {i+1:d}')
                
            elif j == n_clients-1:
                ax.set_xlabel('fl-round' if fl else 'epoch')

            if i == n_rounds-1 and j == n_clients-1:
                ax.legend()

    fig.tight_layout()
    plt.show()

In [None]:
exploratory_plot('loss', n_clients=1, fl=False)

## Learning curve

In [None]:
def learning_plot(metric, ax, n_clients=1, client=None, round=None, fl=False, step=1, legend=True, suffixes=FOLDER_SUFFIXES):
    colors = [
        '#069a2E', #(  6, 154,  46)
        '#780373', #(120,   3, 115)
        '#f10d0c'  #(241,  13,  12)
    ]

    # Choose model:
    model = 'fl' if fl else 'lml'
    if n_clients==1:
        model = 'cml'

    # Print curves:
    n = max(
        add_curve(np.concatenate([scores_train[suffix][model][n_clients][metric] for suffix in suffixes]), '#3465a4', 'training', ax, client=client, round=round, bounds='mean_std'),
        add_curve(np.concatenate([scores_valid[suffix][model][n_clients][metric] for suffix in suffixes]), '#f37500', 'validation', ax, client=client, round=round, bounds='mean_std')
    )

    # Print best weights:
    for suffix in suffixes:
        n = max(n, add_best_weights(scores_train[suffix][model][n_clients][metric], colors.pop(), 'best '+suffixes[suffix], ax, client=client, bounds='min_max'))

    ax.set_xticks(np.arange(n+1, step=step))

    ax.set_title(metric)
    ax.set_xlabel('FL-round' if fl else 'epoch')
    if legend: ax.legend() 

In [None]:
client = None
round = None
fl = False
n_clients=1

fig = plt.figure(figsize=(8, 4))

learning_plot('loss',      fig.add_subplot(2, 2, 1), step=5, client=client, round=round, fl=fl, n_clients=n_clients, legend=False)
learning_plot('F1',        fig.add_subplot(2, 2, 3), step=5, client=client, round=round, fl=fl, n_clients=n_clients, legend=False)

learning_plot('precision', fig.add_subplot(2, 2, 2), step=5, client=client, round=round, fl=fl, n_clients=n_clients, legend=False)
learning_plot('recall',    fig.add_subplot(2, 2, 4), step=5, client=client, round=round, fl=fl, n_clients=n_clients)

fig.tight_layout()
plt.show()

#fig.savefig('./pictures/learning_curve.pdf')

In [None]:
client = None
round = None

fig = plt.figure(figsize=(8, 4))

ax = fig.add_subplot(1, 2, 1)
learning_plot('loss', ax, step=10, client=client, round=round, fl=False, n_clients=1, legend=False)
ax.set_title('loss CML')
#ax.set_ylim(.0,.8)
#ax.set_yticks([.0,.2,.4,.6,.8])

ax = fig.add_subplot(1, 2, 2)
learning_plot('loss', ax, step=10, client=client, round=round, fl=True, n_clients=N_CLIENTS[-1], legend=True)
ax.set_title(f'loss FL {N_CLIENTS[-1]:d} clients')
#ax.set_ylim(.0,.8)
#ax.set_yticks([.0,.2,.4,.6,.8])

fig.tight_layout()
plt.show()

#fig.savefig('./pictures/learning_curve_cont.pdf')

## Scores / Errors

In [None]:
plot_cml = True
plot_fl_clients = [N_CLIENTS[-1]]
plot_lml_clients = [N_CLIENTS[-1]]

plot_train = False
plot_valid = True

In [None]:
def plot(metric, ax, client=None, round=None, step=1, legend=True, y_ticks=[], suffixes=FOLDER_SUFFIXES):
    n = 0
    colors = [
        '#f37500', #(234, 117,   0)
        '#069a2E', #(  6, 154,  46)
        '#3465a4', #( 52, 101, 164)
        '#780373', #(120,   3, 115)
        '#f10d0c'  #(241,  13,  12)
    ]

    # Plot curves:
    if plot_cml:
        if plot_train: n = max(n, add_curve(np.concatenate([scores_train[suffix]['cml'][1][metric] for suffix in suffixes]), colors.pop(), 'CML train.', ax, round=round))
        if plot_valid: n = max(n, add_curve(np.concatenate([scores_valid[suffix]['cml'][1][metric] for suffix in suffixes]), colors.pop(), 'CML valid.', ax, round=round))

    for n_clients in plot_fl_clients:
        if plot_train: n = max(n, add_curve(np.concatenate([scores_train[suffix]['fl'][n_clients][metric] for suffix in suffixes]), colors.pop(), f'FL train. ({n_clients:d} cl.)', ax, client=client, round=round, step=1))
        if plot_valid: n = max(n, add_curve(np.concatenate([scores_valid[suffix]['fl'][n_clients][metric] for suffix in suffixes]), colors.pop(), f'FL valid. ({n_clients:d} cl.)', ax, client=client, round=round, step=1))
        
    for n_clients in plot_lml_clients:
        if plot_train: n = max(n, add_curve(np.concatenate([scores_train[suffix]['lml'][n_clients][metric] for suffix in suffixes]), colors.pop(), f'LML train. ({n_clients:d} cl.)', ax, client=client, round=round))
        if plot_valid: n = max(n, add_curve(np.concatenate([scores_valid[suffix]['lml'][n_clients][metric] for suffix in suffixes]), colors.pop(), f'LML valid. ({n_clients:d} cl.)', ax, client=client, round=round))

    ax.set_xticks(np.arange(n+1, step=step))
    ax.set_xlabel('epoch')
    ax.set_title(metric)
    if len(y_ticks) > 0: ax.set_yticks(y_ticks)
    if legend: ax.legend()

In [None]:
client = None
round = None

fig = plt.figure(figsize=(8, 3))

plot('AUROC', fig.add_subplot(1, 3, 1), step=10, y_ticks=[0.5, 0.6, 0.7, 0.8, 0.9, 1.0], client=client, round=round)
plot('AUPRC', fig.add_subplot(1, 3, 2), step=10, y_ticks=[0.0, 0.2, 0.4, 0.6, 0.8, 1.0], client=client, round=round, legend=False)
plot('F1',    fig.add_subplot(1, 3, 3), step=10, y_ticks=[0.0, 0.2, 0.4, 0.6, 0.8, 1.0], client=client, round=round, legend=False)

plt.tight_layout()
plt.show()

#fig.savefig('./pictures/score_curves.pdf')

## ROC plot

In [None]:
folds = 5

y_test_cml = pd.DataFrame(np.concatenate([predictions[suffix]['cml'][1][0,0,:,:] for suffix in FOLDER_SUFFIXES]))

y_test_fl = {}
y_test_lml = {}
for n in N_CLIENTS:
    y_test_fl[n] = pd.DataFrame(np.concatenate([predictions[suffix]['fl'][n][0,0,:,:] for suffix in FOLDER_SUFFIXES])).dropna()
    y_test_lml[n] = pd.DataFrame(np.concatenate([predictions[suffix]['lml'][n][0,0,:,:] for suffix in FOLDER_SUFFIXES])).dropna()

for f in range(1,5):
    y_test_cml = y_test_cml.append(pd.DataFrame(np.concatenate([predictions[suffix]['cml'][1][f,0,:,:] for suffix in FOLDER_SUFFIXES]))).dropna()

    for n in N_CLIENTS:
        y_test_fl[n] = y_test_fl[n].append(pd.DataFrame(np.concatenate([predictions[suffix]['fl'][n][f,0,:,:] for suffix in FOLDER_SUFFIXES]))).dropna()
        y_test_lml[n] = y_test_lml[n].append(pd.DataFrame(np.concatenate([predictions[suffix]['lml'][n][f,0,:,:] for suffix in FOLDER_SUFFIXES]))).dropna()

In [None]:
y_test_fl[4]

In [None]:
colors = [
    '#f37500', #(234, 117,   0)
    '#069a2E', #(  6, 154,  46)
    '#3465a4', #( 52, 101, 164)
    '#780373', #(120,   3, 115)
    '#f10d0c'  #(241,  13,  12)
]

### ROC curve

In [None]:
def plot_roc(model, n_clients, labels, predictions, ax, **kwargs):
    labels = labels.round(decimals=0, out=None).astype(int)
    fp, tp, _ = sklearn.metrics.roc_curve(labels, predictions)
    #auc = sklearn.metrics.roc_auc_score(labels, predictions)
    auc = np.nanmean(np.concatenate([scores_test[suffix][model][n_clients]['AUROC'][:,1:] for suffix in FOLDER_SUFFIXES]))
    score = model.upper()
    if n_clients > 1:
        score += ' %d cl.' % (n_clients)
    score += ' AUC=%.2f' % (auc)
    ax.plot(fp, tp, label=score,  **kwargs)
    ax.set_xlabel('False Positive Rate (FPR)')
    ax.set_xlim(-.05, 1.05)
    ax.set_ylabel('True Positive Rate (TPR)')
    ax.set_ylim(-.05, 1.05)
    ax.set_title('Receiver Operating Characteristic (ROC) Curve')
    ax.grid(True)
    ax.set_aspect('equal')

### PR curve

In [None]:
def plot_prc(model, n_clients, labels, predictions, ax, **kwargs):
    labels = labels.round(decimals=0, out=None).astype(int)
    precision, recall, _ = sklearn.metrics.precision_recall_curve(labels, predictions)
    #auc = sklearn.metrics.auc(recall, precision)
    auc = np.nanmean(np.concatenate([scores_test[suffix][model][n_clients]['AUPRC'][:,1:] for suffix in FOLDER_SUFFIXES]))
    score = model.upper()
    if n_clients > 1:
        score += ' %d cl.' % (n_clients)
    score += ' AUC=%.2f' % (auc)
    ax.plot(recall, precision, label=score, linewidth=2, **kwargs)
    ax.set_title('Precision-Recall (PR) Curve')
    ax.set_xlabel('Recall')
    ax.set_xlim(-.05, 1.05)
    ax.set_ylabel('Precision')
    ax.set_ylim(-.05, 1.05)
    ax.grid(True)
    ax.set_aspect('equal')

In [None]:
fig = plt.figure(figsize=(8, 4))

# plot AUROC:
ax = fig.add_subplot(1,2,1)
i_color = 0

plot_roc('cml', 1, y_test_cml[0], y_test_cml[1], ax, color=colors[i_color])
i_color += 1

if len(N_CLIENTS) > 0:
    n = N_CLIENTS[-1]
    plot_roc('fl', n, y_test_fl[n][0], y_test_fl[n][1], ax, color=colors[i_color])
    plot_roc('lml', n, y_test_lml[n][0], y_test_lml[n][1], ax, color=colors[i_color + 1])
    i_color += 2

no_skill = len(y_test_cml[0][y_test_cml[0]==1]) / len(y_test_cml[0])
ax.plot([0, 1], [0, 1], linestyle='--', label='baseline')
ax.legend(loc='lower right')

# plot AUPRC:
ax = fig.add_subplot(1,2,2)
i_color = 0

plot_prc('cml', 1, y_test_cml[0], y_test_cml[1], ax, color=colors[i_color])
i_color += 1

if len(N_CLIENTS) > 0:
    n = N_CLIENTS[-1]
    plot_prc('fl', n, y_test_fl[n][0], y_test_fl[n][1], ax, color=colors[i_color])
    plot_prc('lml', n, y_test_lml[n][0], y_test_lml[n][1], ax, color=colors[i_color + 1])
    i_color += 2

no_skill = len(y_test_cml[0][y_test_cml[0]==1]) / len(y_test_cml[0])
ax.plot([0, 1], [no_skill, no_skill], linestyle='--', label='baseline')
ax.legend(loc='upper right')

# save plot:
fig.tight_layout()
plt.show()

#fig.savefig('./pictures/auc_curves.pdf')