In [None]:
import matplotlib
if 'init_done' in globals():
    matplotlib.use("pgf")
    matplotlib.rcParams.update({
        "pgf.texsystem": "pdflatex",
        'font.family': 'serif',
        'text.usetex': True,
        'pgf.rcfonts': False,
    })
import matplotlib.pyplot as plt

import numpy as np
import pickle
import sklearn.metrics
import pandas as pd

init_done = True

In [None]:
RANDOM_STATE = 42
WINDOW_LENGTH = 16
MIN_WINDOW_LENGTH = 48
FOLDER_SUFFIX = ''

# Load result files 

In [None]:
scores_train = {'cml':{}, 'fl':{}, 'lml':{}}
scores_valid = {'cml':{}, 'fl':{}, 'lml':{}}
scores_test =  {'cml':{}, 'fl':{}, 'lml':{}}
predictions =  {'cml':{}, 'fl':{}, 'lml':{}}

In [None]:
def load(n_clients, fl=False):
    path =  f'./scores/min{MIN_WINDOW_LENGTH:d}h/{WINDOW_LENGTH:d}h{FOLDER_SUFFIX:s}/'
    path += ('scores_fl_' if fl else 'scores_')
    path += f'{n_clients:d}clients_{WINDOW_LENGTH:d}h({MIN_WINDOW_LENGTH:d}h).pickle'

    print(f'Loading file "{path:s}"', end='...')

    key = 'cml' if n_clients == 1 else 'fl' if fl else 'lml'

    try:
        with open(path, 'rb') as file:
            scores_train[key][n_clients], scores_valid[key][n_clients], scores_test[key][n_clients], predictions[key][n_clients] = pickle.load(file)
    except:
        with open(path, 'rb') as file:
            scores_train[key][n_clients], scores_valid[key][n_clients], scores_test[key][n_clients] = pickle.load(file)

    print(f'Done.')

## Load central scores

In [None]:
load(n_clients=1)

## Load FL-scores

In [None]:
# 2 clients
load(n_clients=2, fl=True)

# 4 clients
load(n_clients=4, fl=True)

# 8 clients
#load(n_clients=8, fl=True)

## Load local scores

In [None]:
# 2 clients
load(n_clients=2)

# 4 clients
load(n_clients=4)

# 8 clients
#load(n_clients=8)

## Calculate scores with sk-learn:

In [None]:
from helpers import enumerate_predictions
n_labels = 2
n_fold = 5

In [None]:
for model in predictions:
    for n_clients in predictions[model]:
        # Init scores-arrays:
        scores_test[model][n_clients]['AUROC'] = np.zeros((n_fold, n_labels))
        scores_test[model][n_clients]['AUPRC'] = np.zeros((n_fold, n_labels))
        scores_test[model][n_clients]['precision'] = np.zeros((n_fold, n_labels))
        scores_test[model][n_clients]['recall'] = np.zeros((n_fold, n_labels))
        scores_test[model][n_clients]['MAE'] = np.zeros(n_fold)
        scores_test[model][n_clients]['MSE'] = np.zeros(n_fold)
        
        # Calculate actual number of scores:
        n = 1 if model=='fl' else n_clients
        
        for fold in range(n_fold):
            f = 1. / float(n) 
            for i in range(n):
                y_true = []
                y_pred = []
                for t, p in enumerate_predictions(predictions[model][n_clients], n_labels=n_labels, client=i, fold=fold):
                    y_true.append(t.astype(int))
                    y_pred.append(p.astype(float))
                y_true = np.array(y_true)
                y_pred = np.array(y_pred)

                # Calculate classification metrics:
                for label in range(n_labels):
                    prc_crv, rcl_crv, _ = sklearn.metrics.precision_recall_curve(y_true[:, label], y_pred[:, label])

                    scores_test[model][n_clients]['AUROC'][fold, label] += f * sklearn.metrics.roc_auc_score(y_true[:, label], y_pred[:, label])
                    scores_test[model][n_clients]['AUPRC'][fold, label] += f * sklearn.metrics.auc(rcl_crv, prc_crv)
                    scores_test[model][n_clients]['precision'][fold, label] += f * sklearn.metrics.precision_score(y_true[:, label], np.round(y_pred[:, label]))
                    scores_test[model][n_clients]['recall'][fold, label] += f * sklearn.metrics.recall_score(y_true[:, label], np.round(y_pred[:, label]))

                # Calculate error scores:
                y = predictions[model][n_clients][fold, i]
                y = y[~np.isnan(y).any(axis=1), :]
                scores_test[model][n_clients]['MAE'][fold] += f * sklearn.metrics.mean_absolute_error(y[:, 0], y[:, 1])
                scores_test[model][n_clients]['MSE'][fold] += f * sklearn.metrics.mean_squared_error(y[:, 0], y[:, 1])

## Calculate F1-score:

In [None]:
def add_f1(scores):
    precision = scores['precision']
    recall = scores['recall']

    scores['F1'] = 2 * precision * recall / (precision + recall)

In [None]:
for model in scores_train:
    for n_clients in scores_train[model]:
        add_f1(scores_train[model][n_clients])

In [None]:
for model in scores_valid:
    for n_clients in scores_valid[model]:
        add_f1(scores_valid[model][n_clients])

In [None]:
for model in scores_test:
    for n_clients in scores_test[model]:
        add_f1(scores_test[model][n_clients])

# Analize results

### Create latex table:

In [None]:
metrics = ['AUROC', 'AUPRC', 'F1', 'precision', 'recall']
errors = ['MAE', 'MSE']

In [None]:
table = ''

for m in metrics:
    # Row title:
    table += f'\\head\u007B{m:s}\u007D\t'

    # Central model score:
    avg = np.nanmean(scores_test['cml'][1][m][:,1:])
    std = np.nanstd(scores_test['cml'][1][m][:,1:])
    table += f'& ${avg:.2f} \\pm {std:.2f}$\t'

    # FL and local model scores:
    for model in ['fl', 'lml']:
        for n_clients in [2, 4]:
        #for n_clients in [2, 4, 8]:
            avg = np.nanmean(scores_test[model][n_clients][m][:,1:])
            std = np.nanstd(scores_test[model][n_clients][m][:,1:])
            table += f'& ${avg:.2f} \\pm {std:.2f}$\t'

    table += '\\\\\n'

for e in errors:
    # Row title:
    table += f'\\head\u007B{e:s}\u007D\t'

    # Central model score:
    avg = np.nanmean(scores_test['cml'][1][e])
    std = np.nanstd(scores_test['cml'][1][e])
    table += f'& ${avg:.2f} \\pm {std:.2f}$\t'

    # FL and local model scores:
    for model in ['fl', 'lml']:
        for n_clients in [2, 4]:
        #for n_clients in [2, 4, 8]:
            avg = np.nanmean(scores_test[model][n_clients][e])
            std = np.nanstd(scores_test[model][n_clients][e])
            table += f'& ${avg:.2f} \\pm {std:.2f}$\t'

    table += '\\\\\n'

print(table)

### Create plots:

In [None]:
def add_curve(scores, metric, color, label, ax, client=None, round=None, step=1, averaging='mean'):
    # calculate y-values:
    y = scores[metric]
    if client != None:
        y = y[round, client] if round != None else y[:, client]

    elif round != None:
        y = y[round, :]

    y_min = y
    y_avg = y
    y_max = y
    while len(y_avg.shape) > 1:
        y_min = np.min(y_min, axis=0)
        y_avg = np.mean(y_avg, axis=0)
        y_max = np.max(y_max, axis=0)

    # calculate number of values:
    n = min(y_avg.shape[0], int(50/step))
    while np.isnan(y_avg[n-1]):
        n -=1

    # calculate x-values:
    x = np.arange(1,(n*step)+1,step)
    
    # plot curve:
    if averaging == 'min':
        ax.plot(x[:n], y_min[:n], color=color, label=label)

    elif averaging == 'max':
        ax.plot(x[:n], y_max[:n], color=color, label=label)

    else:
        ax.plot(x[:n], y_avg[:n], color=color, label=label)

    return n*step

## Overview plot:

In [None]:
def exploratory_plot(metric, n_clients=1, fl=False, averaging='mean'):
    n_rounds = 5

    fig = plt.figure(figsize=(2*n_rounds, 1.5*n_clients))

    for i in range(n_rounds):
        for j in range(n_clients):
            ax = fig.add_subplot(n_clients, n_rounds, j*n_rounds + i + 1)
            n = 0

            # Print curves:
            if n_clients==1:
                n = max(n, add_curve(scores_train['cml'][1], metric, '#3465a4', 'train', ax, round=i, averaging=averaging))
                n = max(n, add_curve(scores_valid['cml'][1], metric, '#f37500', 'valid', ax, round=i, averaging=averaging))
            
            elif fl:
                n = max(n, add_curve(scores_train['fl'][n_clients], metric, '#3465a4', 'train', ax, client=j, round=i, averaging=averaging))
                n = max(n, add_curve(scores_valid['fl'][n_clients], metric, '#f37500', 'valid', ax, client=j, round=i, averaging=averaging))
                
            else:
                n = max(n, add_curve(scores_train['lml'][n_clients], metric, '#3465a4', 'train', ax, client=j, round=i, averaging=averaging))
                n = max(n, add_curve(scores_valid['lml'][n_clients], metric, '#f37500', 'valid', ax, client=j, round=i, averaging=averaging))

            # Print best weights:
            if n < 50:
                plt.axvline(x=n-20, color='#f10d0c', linestyle='--', label='best')

            ax.set_xticks(np.arange(n+1, step=10))

            if i == 0:
                ax.set_ylabel(metric)

            if j == 0:
                ax.set_title(f'CV-iteration {i+1:d}')
                
            elif j == n_clients-1:
                ax.set_xlabel('fl-round' if fl else 'epoch')

            if i == n_rounds-1 and j == n_clients-1:
                ax.legend()

    fig.tight_layout()
    fig.show()

In [None]:
exploratory_plot('loss', n_clients=4, fl=True)

## Learning curve

In [None]:
def learning_plot(metric, ax, n_clients=1, client=None, round=None, fl=False, step=1, legend=True, averaging='mean'):
    n = 0

    # Print curves:
    if n_clients==1:
        n = max(n, add_curve(scores_train['cml'][1], metric, '#3465a4', 'training', ax, round=round, averaging=averaging))
        n = max(n, add_curve(scores_valid['cml'][1], metric, '#f37500', 'validation', ax, round=round, averaging=averaging))
    
    elif fl:
        n = max(n, add_curve(scores_train['fl'][n_clients], metric, '#3465a4', 'training', ax, client=client, round=round, averaging=averaging))
        n = max(n, add_curve(scores_valid['fl'][n_clients], metric, '#f37500', 'validation', ax, client=client, round=round, averaging=averaging))
        
    else:
        n = max(n, add_curve(scores_train['lml'][n_clients], metric, '#3465a4', 'training', ax, client=client, round=round, averaging=averaging))
        n = max(n, add_curve(scores_valid['lml'][n_clients], metric, '#f37500', 'validation', ax, client=client, round=round, averaging=averaging))

    # Print best weights:
    if n < 50:
        plt.axvline(x=n-20, color='#f10d0c', linestyle='--', label='best weights')

    ax.set_xticks(np.arange(n+1, step=step))

    ax.set_title(metric)
    ax.set_xlabel('FL-round' if fl else 'epoch')
    if legend: ax.legend() 

    fig.tight_layout()
    fig.show()

In [None]:
client = None
round = 2
fl = True
n_clients=4

fig = plt.figure(figsize=(8, 4))

learning_plot('loss', fig.add_subplot(2, 1, 1), step=2, client=client, round=round, fl=fl, n_clients=n_clients, legend=False)

learning_plot('precision', fig.add_subplot(2, 2, 3), step=5, client=client, round=round, fl=fl, n_clients=n_clients, legend=False)
learning_plot('recall', fig.add_subplot(2, 2, 4), step=5, client=client, round=round, fl=fl, n_clients=n_clients)

fig.tight_layout()
plt.show()

#fig.savefig('./pictures/learning_curve.pdf')

In [None]:
client = None
round = 3

fig = plt.figure(figsize=(8, 4))

ax = fig.add_subplot(1, 2, 1)
learning_plot('loss', ax, step=10, client=client, round=round, fl=False, n_clients=1, legend=False)
ax.set_title('loss CML')
#ax.set_ylim(.0,.8)
#ax.set_yticks([.0,.2,.4,.6,.8])

ax = fig.add_subplot(1, 2, 2)
learning_plot('loss', ax, step=10, client=client, round=round, fl=True, n_clients=4, legend=True)
ax.set_title('loss FL 4 clients')
#ax.set_ylim(.0,.8)
#ax.set_yticks([.0,.2,.4,.6,.8])

fig.tight_layout()
fig.show()

#fig.savefig('./pictures/learning_curve_cont.pdf')

## Scores / Errors

In [None]:
plot_cml = True
plot_fl_clients = [4]
plot_lml_clients = [4]

plot_train = False
plot_valid = True

In [None]:
def plot(metric, ax, client=None, round=None, step=1, legend=True, y_ticks=[]):
    n = 0
    colors = [
        '#f37500', #(234, 117,   0)
        '#069a2E', #(  6, 154,  46)
        '#3465a4', #( 52, 101, 164)
        '#780373', #(120,   3, 115)
        '#f10d0c'  #(241,  13,  12)
    ]

    # Plot curves:
    if plot_cml:
        if plot_train: n = max(n, add_curve(scores_train['cml'][1], metric, colors.pop(), 'CML train.', ax, round=round))
        if plot_valid: n = max(n, add_curve(scores_valid['cml'][1], metric, colors.pop(), 'CML valid.', ax, round=round))

    for n_clients in plot_fl_clients:
        if plot_train: n = max(n, add_curve(scores_train['fl'][n_clients], metric, colors.pop(), f'FL train. ({n_clients:d} cl.)', ax, client=client, round=round, step=1))
        if plot_valid: n = max(n, add_curve(scores_valid['fl'][n_clients], metric, colors.pop(), f'FL valid. ({n_clients:d} cl.)', ax, client=client, round=round, step=1))
        
    for n_clients in plot_lml_clients:
        if plot_train: n = max(n, add_curve(scores_train['lml'][n_clients], metric, colors.pop(), f'LML train. ({n_clients:d} cl.)', ax, client=client, round=round))
        if plot_valid: n = max(n, add_curve(scores_valid['lml'][n_clients], metric, colors.pop(), f'LML valid. ({n_clients:d} cl.)', ax, client=client, round=round))

    ax.set_xticks(np.arange(n+1, step=step))
    ax.set_xlabel('epoch')
    ax.set_title(metric)
    if len(y_ticks) > 0: ax.set_yticks(y_ticks)
    if legend: ax.legend()

In [None]:
client = None
round = 2

fig = plt.figure(figsize=(8, 3))

plot('AUROC', fig.add_subplot(1, 3, 1), step=10, y_ticks=[0.5, 0.6, 0.7, 0.8, 0.9, 1.0], client=client, round=round)
plot('AUPRC', fig.add_subplot(1, 3, 2), step=10, y_ticks=[0.0, 0.2, 0.4, 0.6, 0.8, 1.0], client=client, round=round, legend=False)
plot('F1',    fig.add_subplot(1, 3, 3), step=10, y_ticks=[0.0, 0.2, 0.4, 0.6, 0.8, 1.0], client=client, round=round, legend=False)

plt.tight_layout()
plt.show()

#fig.savefig('./pictures/score_curves.pdf')

In [None]:
client = None
round = 1

fig = plt.figure(figsize=(8, 3))

plot('MAE', fig.add_subplot(1, 2, 1), step=10, y_ticks=[.0, .1, .2, .3, .4, .5], client=client, round=round, legend=False)
plot('MSE', fig.add_subplot(1, 2, 2), step=10, y_ticks=[.0, .1, .2, .3, .4, .5], client=client, round=round)

plt.tight_layout()
plt.show()

#fig.savefig('./pictures/error_curves.pdf')

## ROC plot

In [None]:
folds = 5
y_test_cml = pd.DataFrame(predictions['cml'][1][0,0,:,:])
y_test_fl2 = pd.DataFrame(predictions['fl'][2][0,0,:,:]).dropna()
y_test_fl4 = pd.DataFrame(predictions['fl'][4][0,0,:,:]).dropna()
#y_test_fl8 = pd.DataFrame(predictions['fl'][8][0,0,:,:]).dropna()
y_test_lml2 = pd.DataFrame(predictions['lml'][2][0,0,:,:]).dropna()
y_test_lml4 = pd.DataFrame(predictions['lml'][4][0,0,:,:]).dropna()
#y_test_lml8 = pd.DataFrame(predictions['lml'][8][0,0,:,:]).dropna()
for f in range(1,5):
    y_test_cml = y_test_cml.append(pd.DataFrame(predictions['cml'][1][f,0,:,:])).dropna()
    y_test_fl2 = y_test_fl2.append(pd.DataFrame(predictions['fl'][2][f,0,:,:])).dropna()
    y_test_fl4 = y_test_fl4.append(pd.DataFrame(predictions['fl'][4][f,0,:,:])).dropna()
    #y_test_fl8 = y_test_fl8.append(pd.DataFrame(predictions['fl'][8][f,0,:,:])).dropna()
    y_test_lml2 = y_test_lml2.append(pd.DataFrame(predictions['lml'][2][f,0,:,:])).dropna()
    y_test_lml4 = y_test_lml4.append(pd.DataFrame(predictions['lml'][4][f,0,:,:])).dropna()
    #y_test_lml8 = y_test_lml8.append(pd.DataFrame(predictions['lml'][8][f,0,:,:])).dropna()

In [None]:
y_test_fl4

In [None]:
colors = [
    '#f37500', #(234, 117,   0)
    '#069a2E', #(  6, 154,  46)
    '#3465a4', #( 52, 101, 164)
    '#780373', #(120,   3, 115)
    '#f10d0c'  #(241,  13,  12)
]

### ROC curve

In [None]:
def plot_roc(model, n_clients, labels, predictions, ax, **kwargs):
    labels = labels.round(decimals=0, out=None).astype(int)
    fp, tp, _ = sklearn.metrics.roc_curve(labels, predictions)
    #auc = sklearn.metrics.roc_auc_score(labels, predictions)
    auc = np.nanmean(scores_test[model][n_clients]['AUROC'][:,1:])
    score = model.upper()
    if n_clients > 1:
        score += ' %d cl.' % (n_clients)
    score += ' AUC=%.2f' % (auc)
    ax.plot(fp, tp, label=score,  **kwargs)
    ax.set_xlabel('False Positive Rate (FPR)')
    ax.set_xlim(-.05, 1.05)
    ax.set_ylabel('True Positive Rate (TPR)')
    ax.set_ylim(-.05, 1.05)
    ax.set_title('Receiver Operating Characteristic (ROC) Curve')
    ax.grid(True)
    ax.set_aspect('equal')

### PR curve

In [None]:
def plot_prc(model, n_clients, labels, predictions, ax, **kwargs):
    labels = labels.round(decimals=0, out=None).astype(int)
    precision, recall, _ = sklearn.metrics.precision_recall_curve(labels, predictions)
    #auc = sklearn.metrics.auc(recall, precision)
    auc = np.nanmean(scores_test[model][n_clients]['AUPRC'][:,1:])
    score = model.upper()
    if n_clients > 1:
        score += ' %d cl.' % (n_clients)
    score += ' AUC=%.2f' % (auc)
    ax.plot(recall, precision, label=score, linewidth=2, **kwargs)
    ax.set_title('Precision-Recall (PR) Curve')
    ax.set_xlabel('Recall')
    ax.set_xlim(-.05, 1.05)
    ax.set_ylabel('Precision')
    ax.set_ylim(-.05, 1.05)
    ax.grid(True)
    ax.set_aspect('equal')

In [None]:
fig = plt.figure(figsize=(8, 4))

# plot AUROC:
ax = fig.add_subplot(1,2,1)

plot_roc('cml', 1, y_test_cml[0], y_test_cml[1], ax, color=colors[0])
plot_roc('fl',  2, y_test_fl2[0], y_test_fl2[1], ax, color=colors[1])
plot_roc('fl',  4, y_test_fl4[0], y_test_fl4[1], ax, color=colors[2])
#plot_roc('fl',  8, y_test_fl8[0], y_test_fl8[1], ax, color=colors[2])
plot_roc('lml',  2, y_test_lml2[0], y_test_lml2[1], ax, color=colors[3])
plot_roc('lml',  4, y_test_lml4[0], y_test_lml4[1], ax, color=colors[4])
#plot_roc('lml',  8, y_test_lml8[0], y_test_lml8[1], ax, color=colors[4])

no_skill = len(y_test_cml[0][y_test_cml[0]==1]) / len(y_test_cml[0])
ax.plot([0, 1], [0, 1], linestyle='--', label='baseline')
ax.legend(loc='lower right')

# plot AUPRC:
ax = fig.add_subplot(1,2,2)

plot_prc('cml', 1, y_test_cml[0], y_test_cml[1], ax, color=colors[0])
plot_prc('fl',  2, y_test_fl2[0], y_test_fl2[1], ax, color=colors[1])
plot_prc('fl',  4, y_test_fl4[0], y_test_fl4[1], ax, color=colors[2])
#plot_prc('fl',  8, y_test_fl8[0], y_test_fl8[1], ax, color=colors[2])
plot_prc('lml',  2, y_test_lml2[0], y_test_lml2[1], ax, color=colors[3])
plot_prc('lml',  4, y_test_lml4[0], y_test_lml4[1], ax, color=colors[4])
#plot_prc('lml',  8, y_test_lml8[0], y_test_lml8[1], ax, color=colors[4])

no_skill = len(y_test_cml[0][y_test_cml[0]==1]) / len(y_test_cml[0])
ax.plot([0, 1], [no_skill, no_skill], linestyle='--', label='baseline')
ax.legend(loc='upper right')

# save plot:
fig.tight_layout()
fig.show()

#fig.savefig('./pictures/auc_curves.pdf')