In [1]:
import pandas as pd
import numpy as np
import os, sys
import cv2
import matplotlib.pyplot as plt
import copy
import tensorflow as tf
from io import StringIO # Python3 use: from io import StringIO
import seaborn as sns

models = tf.keras.models  # like 'from tensorflow.keras import models' (PyCharm import issue workaround)
layers = tf.keras.layers  # like 'from tensorflow.keras import layers' (PyCharm import issue workaround)

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

DATA = os.path.join(module_path, 'Data', 'Augmented Data')
RESULTS = os.path.join(module_path, 'Results', 'Thesis')
FIGURES = os.path.join(module_path, 'Figures', 'Thesis')
from Scripts import Data_Loader_Functions as DL
from Scripts import Model_Architectures as mA
from Scripts import Results_Evaluation as rE

In [2]:
def create_simple_grid(file_paths, y_ticks, metric, top_exp):
    colors = ['#CD6155', '#2E86C1']
    styles = ['-','--']
    
    # Create Figure
    fig = plt.figure(figsize=(18, 16))
    fig.subplots_adjust(hspace=0.4, wspace=0.2)
    for i, file_path in enumerate(file_paths):
        
        # Add figure to subplot and read in data frame (session 0 is the 'pre-training session')
        
        df = pd.read_excel(file_path)
        df = df[df['Session'] != 0].reset_index()
        
        # Prepare dataframe for plotting
        metrics = [metric, 'val_' + metric]
        df_plot = df[metrics]
        df_plot = df_plot.rename(columns={metrics[0]: 'Train ' + metric, metrics[1]: 'Validation ' + metric})
        
        ax = fig.add_subplot(len(file_paths) // 2 + 1, 2, i + 1)
        df_plot.plot(ax=ax, style=styles, color=colors)
        
        # Add vertical lines indicating sessions
        for j, val in enumerate(df[df['Epoch'] == 0].index.values):
            ax.axvline(val, c='#C2C5CC', ls=':')
            plt.text(val + 1, 0.02, 'S: {}'.format(j+1))
            
        # Set title and legends
        ax.set_title(top_exp[file_path.split('Plotting/')[1]])
        ax.set_yticks(y_ticks)
        if i == 0:
            ax.set_xlabel('Centralized Epochs')
        else:
            ax.set_xlabel('Federated Communication Rounds')
    
    plt.tight_layout()
    return fig, ax

In [3]:
def create_avg_grid(file_paths_mean, file_paths_sd, y_ticks, metric, top_exp):
    colors = ['#CD6155', '#2E86C1']
    styles = ['-','--']
    
    # Create Figure
    fig = plt.figure(figsize=(18, 16))
    fig.subplots_adjust(hspace=0.4, wspace=0.2)
    for i, (file_path_1, file_path_2) in enumerate(zip(file_paths_mean, file_paths_sd)):
        
        # Add figure to subplot and read in data frame (session 0 is the 'pre-training session')
        df_mean = pd.read_excel(file_path_1).fillna(0)
        df_sd = pd.read_excel(file_path_2).fillna(0)
        df_mean = df_mean[df_mean['Session'] != 0].reset_index()
        df_sd = df_sd[df_sd['Session'] != 0].reset_index()
        
        # Prepare dataframe for plotting
        metrics = [metric, 'val_' + metric]
        df_plot_mean = df_mean[metrics]
        df_plot_sd = df_sd[metrics]
        df_plot_mean = df_plot_mean.rename(columns={metrics[0]: 'Train ' + metric, metrics[1]: 'Validation ' + metric})
        df_plot_sd = df_plot_sd.rename(columns={metrics[0]: 'Train ' + metric, metrics[1]: 'Validation ' + metric})
        ax = fig.add_subplot(len(file_paths) // 2 + 1, 2, i + 1)
        df_plot_mean.plot(ax=ax, style=styles, color=colors)
        plt.fill_between(df_plot_mean.index, (df_plot_mean - df_plot_sd)[df_plot_mean.columns[0]], (df_plot_mean + df_plot_sd)[df_plot_mean.columns[0]], color='#CD6155', alpha=0.2)
        plt.fill_between(df_plot_mean.index, (df_plot_mean - df_plot_sd)[df_plot_mean.columns[1]], (df_plot_mean + df_plot_sd)[df_plot_mean.columns[1]], color='#2E86C1', alpha=0.2)
        
        # Add vertical lines indicating sessions
        for j, val in enumerate(df_mean[df_mean['Epoch'] == 0].index.values):
            ax.axvline(val, c='#C2C5CC', ls=':')
            plt.text(val + 1, 0.02, 'S: {}'.format(j+1))
            
        # Set title and legends
        ax.set_title(top_exp[file_path_1.split('MEAN_')[1]])
        ax.set_yticks(y_ticks)
        if i == 0:
            ax.set_xlabel('Centralized Epochs')
        else:
            ax.set_xlabel('Federated Communication Rounds')
    
    plt.tight_layout()
    return fig, ax

In [4]:
def draw_pain_bar_chart(group_by):
    # Create Dataframe
    df = pd.DataFrame(DL.get_labels(DL.get_image_paths(os.path.join(DATA, 'group_2'))), columns=['Person', 'Session', 'Culture', 'Frame', 'Pain', 'Trans_1', 'Trans2'])
    df[['Person', 'Session', 'Culture', 'Frame', 'Pain', ]] = df[['Person', 'Session', 'Culture', 'Frame', 'Pain', ]].astype(int)
    
    # Get Frequencey values
    group = sorted(df[group_by].unique())
    share_1 = {}
    for elem in group:
        key, val = np.unique(df[df[group_by] == elem]['Pain'], return_counts=True)
        if 1 in key:
            share_1[elem] = val[1] / sum(val[1:])
        else:
            share_1[elem] = 0
    
    # Draw figure
    plt.figure(figsize=(10,5))
    if group_by == 'Session':
        bars = plt.bar(share_1.keys(), share_1.values(), color='#2E86C1')
        bars[5].set_color('#CD6155')
    else:
        bars = plt.bar(np.arange(0,len(share_1.keys()),1), share_1.values(), color='#2E86C1')
        bars[1].set_color('#CD6155')
    autolabel(bars)
    
    
    plt.xticks(np.arange(0,len(share_1.keys())+1,1), group)
    plt.yticks(np.arange(0,1.1,0.2), ['{}%'.format(int(num * 100)) for num in np.arange(0,1.1,0.2)])
    plt.xlabel(group_by) if group_by == 'Session' else plt.xlabel("Test Subjects")
    plt.ylabel("Share of Pain Level '1'")
    ax = plt.gca()
    ax.yaxis.grid(True, linestyle='--')
    fig = plt.gcf()
    return fig, ax

In [5]:
def create_avg_conf_matrix(model_type, session, subject):
    df_concat = pd.DataFrame()
    for folder in folders:
        f_path = os.path.join(RESULTS, folder)
        files = [os.path.join(f_path, file) for file in os.listdir(f_path) if '.csv' in file and model_type in file]
        df_concat = df_concat.add(pd.read_csv(files[0]), fill_value=0)
    df_concat = df_concat / len(folders)

    cols = [col for col in df_concat.columns if str(subject) in col]
    df_conf_matrix = pd.DataFrame()
    df_conf_matrix['0'] = [df_concat[cols].iloc[session - 1].loc['subject_{}_true_negatives'.format(subject)],
                           df_concat[cols].iloc[session - 1].loc['subject_{}_false_positives'.format(subject)]]
    df_conf_matrix['1'] = [df_concat[cols].iloc[session - 1].loc['subject_{}_false_negatives'.format(subject)],
                           df_concat[cols].iloc[session - 1].loc['subject_{}_true_positives'.format(subject)]]
    return df_conf_matrix

In [6]:
def autolabel(rects, xpos='center'):
    """
    Attach a text label above each bar in *rects*, displaying its height.

    *xpos* indicates which side to place the text w.r.t. the center of
    the bar. It can be one of the following {'center', 'right', 'left'}.
    """

    ha = {'center': 'center', 'right': 'left', 'left': 'right'}
    offset = {'center': 0, 'right': 1, 'left': -1}

    for rect in rects:
        height = rect.get_height()
        plt.annotate('{:.0%}'.format(height),
                    xy=(rect.get_x() + rect.get_width() / 2, height),
                    xytext=(offset[xpos]*3, 3),  # use 3 points offset
                    textcoords="offset points",  # in both directions
                    ha=ha[xpos], va='bottom')

In [7]:
def create_pain_df(path, pain_gap=()):
    img_paths = np.array(DL.get_image_paths(path))
    labels = np.array(DL.get_labels(img_paths))
    df = pd.DataFrame(labels, columns=['Person', 'Session', 'Culture', 'Frame', 'Pain', 'Trans_1', 'Trans_2'])
    df[['Person', 'Session', 'Culture', 'Frame', 'Pain']] = df[
        ['Person', 'Session', 'Culture', 'Frame', 'Pain']].astype(int)
    df['img_path'] = img_paths
    df[['Trans_1', 'Trans_2', 'img_path']] = df[['Trans_1', 'Trans_2', 'img_path']].astype(str)
    df = df.sort_values(['Person', 'Session', 'Frame', 'Trans_1', 'Trans_2'],
                        ascending=[True, True, True, False, False]).reset_index(drop=True)
    df['temp_id'] = df['Person'].astype(str) + df['Session'].astype(str) + df['Frame'].astype(str)
    df = df[~df['Pain'].isin(pain_gap)]
    return df

In [8]:
def model_summary_to_df(model, mystdout):
    model.summary(line_length=100)
    model_summary = mystdout.getvalue()
    a = model_summary.split('\n')
    a = a[4:-5]
    a = a[::2]
    for idx, elem in enumerate(a):
        a[idx] = [elem[:45], elem[45:85], elem[85:]]
    return pd.DataFrame(a, columns=['Layer Type', 'Output Shape', 'Param #'])

In [9]:
def format_df(df):
    df['Layer Type'] = df['Layer Type'].map(lambda x: x.split('(')[1].split(')')[0])
    df['Param #'] = df['Param #'].astype(float)
    df = df.append(df.sum(numeric_only=True), ignore_index=True)
    df.loc[len(df)-1, 'Layer Type'] = 'Total'
    df['Param #'] = df['Param #'].apply("{0:,.0f}".format)
    return df.fillna('')

In [10]:
def model_summary():
    model_1 = build_CNN((215, 215, 1))
    old_stdout = copy.copy(sys.stdout)
    sys.stdout = mystdout = StringIO()
    df = model_summary_to_df(model_1, mystdout)
    df = format_df(df)
    df = pd.concat((pd.DataFrame([{'Layer Type': 'Input', 'Output Shape': '(None, 215, 215, 1)',  'Param #': 0}]), df), ignore_index=True)
    return df, old_stdout

In [11]:
def build_CNN(input_shape):
    """
    Return a simple CNN model for image classification.

    :param input_shape:     image input shape (tuple), e.g. (28, 28, 1)

    :return:
        model               compiled tensorflow model
    """

    print("Setting up CNN")
    # Set up model type
    model = models.Sequential(name='CNN')

    # Add layers
    model.add(layers.Conv2D(filters=32, kernel_size=(5, 5), input_shape=input_shape, padding='same', strides=(2, 2),
                            name='conv2d_0_global'))
    model.add(layers.BatchNormalization(name='batch_norm_0_global'))
    model.add(layers.ReLU(name='relu_0_global'))
    # model.add(layers.MaxPooling2D(name='max_pool_0_global'))

    model.add(layers.Conv2D(filters=64, kernel_size=(5, 5), padding='same', strides=(2, 2), name='conv2d_1_global'))
    model.add(layers.BatchNormalization(name='batch_norm_1_global'))
    model.add(layers.ReLU(name='relu_1_global'))
    # model.add(layers.MaxPooling2D(name='max_pool_1_global'))

    model.add(layers.Conv2D(filters=128, kernel_size=(5, 5), padding='same', strides=(2, 2), name='conv2d_2_global'))
    model.add(layers.BatchNormalization(name='batch_norm_2_global'))
    model.add(layers.ReLU(name='relu_2_global'))
    model.add(layers.MaxPooling2D(name='max_pool_2_global'))

    model.add(layers.Flatten(name='flatten_0_local'))
    model.add(layers.Dense(units=128, name='dense_0_local'))
    model.add(layers.BatchNormalization(name='batch_norm_3_local'))
    model.add(layers.ReLU(name='relu_3_local'))
    model.add(layers.Dense(units=1, activation='sigmoid', name='dense_1_local'))

    return model

# Load Dataset

In [12]:
df = create_pain_df(DATA)

# Original Images

In [13]:
df_original = df[(df['Trans_1'] == 'original') & (df['Trans_2'] == 'straight')]

### Total images

In [14]:
len(df_original)

48106

### Pain Distribution - Whole Dataset

In [15]:
df_pain_levels = pd.DataFrame(np.unique(df_original['Pain'], return_counts=True)).T.rename(columns={0: 'Pain Level', 1: "Count"})
df_pain_levels['%'] = round(df_pain_levels['Count'] / df_pain_levels['Count'].sum(), 3) * 100
df_pain_levels['Count'] = df_pain_levels['Count'].apply("{0:,.0f}".format)
print(df_pain_levels.to_latex(index=False))

\begin{tabular}{rlr}
\toprule
 Pain Level &   Count &     \% \\
\midrule
          0 &  39,846 &  82.8 \\
          1 &   3,100 &   6.4 \\
          2 &   2,281 &   4.7 \\
          3 &   1,408 &   2.9 \\
          4 &     801 &   1.7 \\
          5 &     241 &   0.5 \\
          6 &     265 &   0.6 \\
          7 &      53 &   0.1 \\
          8 &      79 &   0.2 \\
          9 &      32 &   0.1 \\
\bottomrule
\end{tabular}



### Histogram Equilization

In [16]:
# Set PLT Parameters
plt.rcParams.update({'font.size': 14, 
                     'font.family' : 'cmr10', 
                     'font.weight' : 'normal',
                     'axes.titlesize' : 18})

In [18]:
grey = cv2.imread(os.path.join('Dummy Data', '47_0_0_0_0_grey.png'), 0).flatten()
hist_eq = cv2.imread(os.path.join('Dummy Data', '47_0_0_0_0_hist.jpg'), 0).flatten()

AttributeError: 'NoneType' object has no attribute 'flatten'

In [19]:
ax1 = plt.hist(grey, 256)
plt.ylabel('Frequency Count')
plt.xlabel('Pixel Value')
plt.savefig(os.path.join(FIGURES, 'histogram.pdf'), dpi=300)

NameError: name 'grey' is not defined

In [20]:
ax1 = plt.hist(hist_eq, 256)
plt.ylabel('Frequency Count')
plt.xlabel('Pixel Value')
plt.savefig(os.path.join(FIGURES, 'histogram_eq.pdf'), dpi=300)

NameError: name 'hist_eq' is not defined

### Pain vs. No Pain

In [21]:
print(sum(df_original['Pain'] == 0),"|", sum(df_original['Pain'] > 0))

39846 | 8260


### Group 1 vs. Group 2

In [22]:
df_1 = create_pain_df(os.path.join(DATA, "group_1"))
df_2 = create_pain_df(os.path.join(DATA, "group_2"))
print(len(df_1), "|", len(df_2))

102976 | 78180


In [23]:
df_p_1 = pd.DataFrame(df_1['Person'].unique(), columns=['Group 1'])
df_p_2 = pd.DataFrame(df_2['Person'].unique(), columns=['Group 2'])
df_p = pd.concat((df_p_1, df_p_2), sort=False, axis=1).T
print(df_p.to_latex(header=False))

\begin{tabular}{lrrrrrrrrrrrr}
\toprule
Group 1 &  42 &  47 &  49 &  66 &  95 &  97 &  103 &  106 &  108 &  121 &  123 &  124 \\
Group 2 &  43 &  48 &  52 &  59 &  64 &  80 &   92 &   96 &  107 &  109 &  115 &  120 \\
\bottomrule
\end{tabular}



In [24]:
df['Person'].unique()

array([ 42,  43,  47,  48,  49,  52,  59,  64,  66,  80,  92,  95,  96,
        97, 101, 103, 106, 107, 108, 109, 115, 120, 121, 123, 124])

## Person

In [25]:
df_original = df_original.copy()
df_original['Pain'] = np.minimum(df_original['Pain'], 1)
no_pain = df_original.groupby('Person')['Pain'].count() - df_original.groupby('Person')['Pain'].sum()
pain = df_original.groupby('Person')['Pain'].sum()
no_pain.name = 'No Pain'
pain_df = pd.concat((pd.DataFrame(no_pain), pd.DataFrame(pain)), axis=1)
pain_df['% Pain'] = (pain_df['Pain'] / pain_df.sum(axis=1) * 100).astype(int).astype(str) + '%'
pain_df = pain_df.reset_index()
pain_df['Pain'] = pain_df['Pain'].apply("{0:,.0f}".format)
pain_df['No Pain'] = pain_df['No Pain'].apply("{0:,.0f}".format)
pain_1 = pain_df[pain_df['Person'].isin(df_p_1['Group 1'])]
pain_2 = pain_df[pain_df['Person'].isin(df_p_2['Group 2'])]
array = [['Group 1', 'Group 1', 'Group 1', 'Group 1'], list(pain_1.columns.values)]
tuples = list(zip(*array))
idx1 = pd.MultiIndex.from_tuples(tuples)
array = [['Group 2', 'Group 2', 'Group 2', 'Group 2'], list(pain_2.columns.values)]
tuples = list(zip(*array))
idx2 = pd.MultiIndex.from_tuples(tuples)
pain_1.columns = idx1
pain_2.columns = idx2
print(pain_1.to_latex(index=False))
print(pain_2.to_latex(index=False))

\begin{tabular}{rlll}
\toprule
Group 1 \\
 Person & No Pain & Pain & \% Pain \\
\midrule
     42 &   1,895 &  239 &    11\% \\
     47 &   1,544 &   64 &     3\% \\
     49 &   2,194 &  524 &    19\% \\
     66 &   1,947 &  512 &    20\% \\
     95 &     304 &  498 &    62\% \\
     97 &   3,212 &  147 &     4\% \\
    103 &   2,738 &  824 &    23\% \\
    106 &   2,281 &  517 &    18\% \\
    108 &   2,453 &  455 &    15\% \\
    121 &     478 &   40 &     7\% \\
    123 &     822 &  361 &    30\% \\
    124 &     699 &  996 &    58\% \\
\bottomrule
\end{tabular}

\begin{tabular}{rlll}
\toprule
Group 2 \\
 Person & No Pain &   Pain & \% Pain \\
\midrule
     43 &   1,028 &     92 &     8\% \\
     48 &     798 &     84 &     9\% \\
     52 &   2,503 &    106 &     4\% \\
     59 &     640 &    133 &    17\% \\
     64 &   1,394 &    155 &    10\% \\
     80 &     896 &  1,068 &    54\% \\
     92 &   1,031 &    471 &    31\% \\
     96 &   2,175 &    178 &     7\% \\
    107 &   1,599

## Pain Pivot

In [26]:
index = 'Person'
columns = 'Session'
values = index
pivot = DL.create_pivot(os.path.join(DATA, "group_2"), index, columns, values)
pivot['Pain %'] = (pivot['Pain %'] * 100).astype(int).astype(str) + '%'
pivot = pivot.drop('Total')
pivot = pivot.reset_index()
array = [[' ', 'Session', 'Session', 'Session', 'Session', 'Session', 'Session', 'Session', 'Session', 'Session', 'Session', 'Total', 'Total', 'Total', 'Total'], list(pivot.columns.values)]
tuples = list(zip(*array))
index = pd.MultiIndex.from_tuples(tuples)
pivot.columns = index
pivot = pivot.sort_values([(' ', 'Person')], ascending=True)
pivot['Session'] = pivot['Session'][pivot['Session'] != ''].astype(float).applymap("{0:,.0f}".format).replace('nan', '')
pivot[('Total', 'Pain')] = pivot[('Total', 'Pain')].astype(float).apply("{0:,.0f}".format)
pivot[('Total', 'No Pain')] = pivot[('Total', 'No Pain')].astype(float).apply("{0:,.0f}".format)
print(pivot.to_latex(index=False))

\begin{tabular}{rllllllllllrlll}
\toprule
       & \multicolumn{10}{l}{Session} & \multicolumn{4}{l}{Total} \\
Person &       0 &    1 &    2 &    3 &    4 &    5 &    6 & 7 &    8 &    9 & \# of Sessions &   Pain & No Pain & Pain \% \\
\midrule
    43 &     140 &      &      &      &  228 &      &      &   &      &      &             9 &    368 &   4,112 &     8\% \\
    48 &         &  148 &      &      &      &  188 &      &   &      &      &             7 &    336 &   3,192 &    10\% \\
    52 &      72 &      &      &      &      &      &   44 &   &  120 &  188 &            10 &    424 &  10,012 &     4\% \\
    59 &         &  532 &      &      &      &      &      &   &      &      &             2 &    532 &   2,560 &    17\% \\
    64 &     244 &   64 &   64 &      &  248 &      &      &   &      &      &             6 &    620 &   5,576 &    10\% \\
    80 &   1,052 &  536 &  484 &  484 &  660 &  792 &  264 &   &      &      &             7 &  4,272 &   3,584 &    54\% \\
    

## Model Architectures

In [27]:
PADDING = 'same'
BATCH_NORM = True
RELU = True
MAX_POOL = True
GLOB_MAX_POOL = True
STRIDE = (1, 1)

df, stdout = model_summary()
sys.stdout = stdout
print(df.to_latex(index=False))

Setting up CNN
\begin{tabular}{lll}
\toprule
         Layer Type &                              Output Shape &    Param \# \\
\midrule
              Input &                       (None, 215, 215, 1) &          0 \\
             Conv2D &  (None, 108, 108, 32)                     &        832 \\
 BatchNormalization &  (None, 108, 108, 32)                     &        128 \\
               ReLU &  (None, 108, 108, 32)                     &          0 \\
             Conv2D &  (None, 54, 54, 64)                       &     51,264 \\
 BatchNormalization &  (None, 54, 54, 64)                       &        256 \\
               ReLU &  (None, 54, 54, 64)                       &          0 \\
             Conv2D &  (None, 27, 27, 128)                      &    204,928 \\
 BatchNormalization &  (None, 27, 27, 128)                      &        512 \\
               ReLU &  (None, 27, 27, 128)                      &          0 \\
       MaxPooling2D &  (None, 13, 13, 128)                      &

## Performance Tables

In [28]:
# Define Variables
pivot = DL.create_pivot(os.path.join(DATA, 'group_2'), 'Session', 'Person', 'Session')
subjects = DL.create_pain_df(os.path.join(DATA, 'group_2'))['Person'].unique()

In [29]:
# Define experiment names
exp_names = {
    '0-sessions-Baseline-random': 'RANDOM',
    '0-sessions-Baseline-central-pre-training': 'BC-CNN',
    '0-sessions-Baseline-federated-pre-training': 'BF-CNN',
    '1-sessions-Centralized-no-pre-training': 'C-CNN (N)',
    '10-sessions-Federated-central-pre-training-local-models': 'FL-CNN (C)',
    '11-sessions-Federated-federated-pre-training-local-models': 'FL-CNN (F)',
    '2-sessions-Centralized-pre-training': 'C-CNN (C)',
    '3-sessions-Federated-no-pre-training': 'F-CNN (N)',
    '4-sessions-Federated-central-pre-training': 'F-CNN (C)',
    '5-sessions-Federated-federated-pre-training': 'F-CNN (F)',
    '6-sessions-Federated-no-pre-training-personalization': 'FP-CNN (N)',
    '7-sessions-Federated-central-pre-training-personalization': 'FP-CNN (C)',
    '8-sessions-Federated-federated-pre-training-personalization': 'FP-CNN (F)',
    '9-sessions-Federated-no-pre-training-local-models': 'FL-CNN (N)'
}

In [30]:
# Define top experiments
top_exp = ['RANDOM', 'BC-CNN', 'C-CNN (C)', 'F-CNN (C)', 'FP-CNN (C)', 'FL-CNN (C)']

In [31]:
results = rE.compute_average_metrics('person', subjects, pivot, RESULTS)
overview_table = rE.generate_overview_table(results, exp_names)

### Overall

In [32]:
print(overview_table.to_latex(index=False))

\begin{tabular}{llll}
\toprule
            & \multicolumn{3}{l}{Weighted AVG + STD} \\
 Experiment &                ACC &   PR-AUC &       F1 \\
\midrule
     BC-CNN &            73 ± 12 &  54 ± 23 &  47 ± 24 \\
     BF-CNN &            74 ± 12 &  53 ± 23 &  43 ± 21 \\
     RANDOM &            44 ± 15 &  31 ± 16 &   32 ± 2 \\
  C-CNN (N) &            69 ± 17 &  49 ± 23 &  39 ± 25 \\
  C-CNN (C) &            75 ± 13 &  58 ± 21 &  50 ± 22 \\
  F-CNN (N) &            66 ± 16 &  49 ± 23 &  43 ± 27 \\
  F-CNN (C) &            75 ± 11 &  59 ± 23 &  52 ± 25 \\
  F-CNN (F) &            76 ± 12 &  59 ± 23 &  49 ± 25 \\
 FP-CNN (N) &            69 ± 18 &  43 ± 19 &  34 ± 25 \\
 FP-CNN (C) &            76 ± 12 &  56 ± 21 &  50 ± 24 \\
 FP-CNN (F) &            76 ± 13 &  55 ± 22 &  44 ± 24 \\
 FL-CNN (N) &            69 ± 18 &  43 ± 18 &  34 ± 26 \\
 FL-CNN (C) &            75 ± 13 &  55 ± 21 &  47 ± 23 \\
 FL-CNN (F) &            75 ± 14 &  54 ± 21 &  42 ± 23 \\
\bottomrule
\end{tabular}



In [33]:
# Print top experiments
print(overview_table[overview_table[('', 'Experiment')].isin(top_exp)].to_latex(index=False))

\begin{tabular}{llll}
\toprule
            & \multicolumn{3}{l}{Weighted AVG + STD} \\
 Experiment &                ACC &   PR-AUC &       F1 \\
\midrule
     BC-CNN &            73 ± 12 &  54 ± 23 &  47 ± 24 \\
     RANDOM &            44 ± 15 &  31 ± 16 &   32 ± 2 \\
  C-CNN (C) &            75 ± 13 &  58 ± 21 &  50 ± 22 \\
  F-CNN (C) &            75 ± 11 &  59 ± 23 &  52 ± 25 \\
 FP-CNN (C) &            76 ± 12 &  56 ± 21 &  50 ± 24 \\
 FL-CNN (C) &            75 ± 13 &  55 ± 21 &  47 ± 23 \\
\bottomrule
\end{tabular}



## Per Person

In [34]:
# Generate Header for LaTex table
# header = ['ACC',  43,  48,  52,  59,  64,  80,  92,  96,  107,  109,  115,  120, 'wt. Mean ± SD']
header = ['ACC',  1, 2, 3, 4, 5, 6, 7, 8, 9, 'wt. Mean ± SD']
formatter = ['\\textbf{' + '{}'.format(elem) + '}' for elem in header]
formatted_header = ''
for elem in formatter:
    formatted_header = formatted_header + elem + ' &  '
print(formatted_header)

\textbf{ACC} &  \textbf{1} &  \textbf{2} &  \textbf{3} &  \textbf{4} &  \textbf{5} &  \textbf{6} &  \textbf{7} &  \textbf{8} &  \textbf{9} &  \textbf{wt. Mean ± SD} &  


In [35]:
results_p = rE.compute_average_metrics('person', subjects, pivot, RESULTS)

In [61]:
print(rE.prepare_top_experiments(results_p['accuracy'], exp_names, top_exp).to_latex(index=False))
print(rE.prepare_top_experiments(results_p['pr'], exp_names, top_exp).to_latex(index=False))
tab = rE.prepare_top_experiments(results_p['f1_score'], exp_names, top_exp)
cols = [col for col in results_p['f1_score'].columns.values if type(col) is int]
tab['Mean ± SD'] = tab[cols].mean(axis=1).round(0).astype(int).astype(str) + ' ± ' + tab[cols].std(axis=1).round(0).astype(int).astype(str)
print(tab.to_latex(index=False))

\begin{tabular}{lrrrrrrrrrrrrl}
\toprule
 Experiment &  43 &  48 &  52 &  59 &  64 &  80 &  92 &  96 &  107 &  109 &  115 &  120 & Mean ± SD \\
\midrule
     BC-CNN &  70 &  78 &  92 &  48 &  90 &  57 &  68 &  79 &   64 &   76 &   70 &   72 &   72 ± 13 \\
     RANDOM &  44 &  44 &  38 &  46 &  37 &  49 &  53 &  41 &   49 &   41 &   38 &   45 &   44 ± 16 \\
  C-CNN (C) &  79 &  78 &  81 &  48 &  91 &  62 &  72 &  84 &   68 &   77 &   88 &   67 &   75 ± 12 \\
  F-CNN (C) &  71 &  78 &  91 &  48 &  92 &  61 &  72 &  84 &   69 &   71 &   74 &   66 &   73 ± 13 \\
 FP-CNN (C) &  82 &  78 &  87 &  48 &  91 &  62 &  78 &  84 &   67 &   77 &   90 &   73 &   76 ± 13 \\
 FL-CNN (C) &  83 &  78 &  86 &  48 &  92 &  61 &  66 &  84 &   68 &   77 &   90 &   71 &   75 ± 13 \\
\bottomrule
\end{tabular}

\begin{tabular}{lrrrrrrrrrrrrl}
\toprule
 Experiment &  43 &  48 &  52 &  59 &  64 &  80 &  92 &  96 &  107 &  109 &  115 &  120 & Mean ± SD \\
\midrule
     BC-CNN &  70 &  27 &  39 &  62 &  39 &  65 &

### Ranking Table

In [37]:
df_acc = pd.DataFrame(columns=['Accuracy'], data=(rE.prepare_top_experiments(results_p['accuracy'], exp_names, top_exp).set_index('Experiment').drop('wt. Mean ± SD', axis=1).rank(method='dense') - 1).sum(axis=1)) / 12
df_pr = pd.DataFrame(columns=['PR-AUC'], data=(rE.prepare_top_experiments(results_p['pr'], exp_names, top_exp).set_index('Experiment').drop('wt. Mean ± SD', axis=1).rank(method='dense') - 1).sum(axis=1)) / 12
df_f1 = pd.DataFrame(columns=['F1'], data=(rE.prepare_top_experiments(results_p['f1_score'], exp_names, top_exp).set_index('Experiment').drop('wt. Mean ± SD', axis=1).rank(method='dense') - 1).sum(axis=1)) / 12
print(pd.concat((df_acc, df_pr, df_f1), axis=1).reset_index().to_latex(index=False))

KeyError: "['wt. Mean ± SD'] not found in axis"

## Per Session

In [38]:
results_s = rE.compute_average_metrics('session', subjects, pivot, RESULTS)

In [63]:
print(rE.prepare_top_experiments(results_s['accuracy'], exp_names, top_exp).to_latex(index=False))
print(rE.prepare_top_experiments(results_s['pr'], exp_names, top_exp).to_latex(index=False))
tab = rE.prepare_top_experiments(results_s['f1_score'], exp_names, top_exp)
cols = [col for col in results_s['f1_score'].columns.values if type(col) is int]
tab['Mean ± SD'] = tab[cols].mean(axis=1).round(0).astype(int).astype(str) + ' ± ' + tab[cols].std(axis=1).round(0).astype(int).astype(str)
print(tab.to_latex(index=False))

\begin{tabular}{lrrrrrrlrrl}
\toprule
 Experiment &   1 &   2 &   3 &   4 &   5 &   6 &   7 &   8 &   9 & Mean ± SD \\
\midrule
     BC-CNN &  68 &  63 &  77 &  72 &  68 &  69 &  NA &  89 &  96 &   75 ± 12 \\
     RANDOM &  47 &  43 &  44 &  45 &  44 &  44 &  NA &  37 &  39 &   43 ± 12 \\
  C-CNN (C) &  68 &  74 &  82 &  78 &  64 &  79 &  NA &  81 &  81 &    76 ± 7 \\
  F-CNN (C) &  68 &  73 &  78 &  74 &  61 &  80 &  NA &  88 &  96 &   77 ± 11 \\
 FP-CNN (C) &  68 &  76 &  83 &  78 &  65 &  80 &  NA &  85 &  89 &    78 ± 8 \\
 FL-CNN (C) &  68 &  74 &  84 &  75 &  64 &  79 &  NA &  84 &  88 &    77 ± 8 \\
\bottomrule
\end{tabular}

\begin{tabular}{lrrrrrrlrrl}
\toprule
 Experiment &   1 &   2 &   3 &   4 &   5 &   6 &   7 &   8 &   9 & Mean ± SD \\
\midrule
     BC-CNN &  53 &  56 &  51 &  73 &  39 &  48 &  NA &  10 &  80 &   51 ± 22 \\
     RANDOM &  41 &  34 &  26 &  36 &  35 &  28 &  NA &   7 &  10 &   27 ± 13 \\
  C-CNN (C) &  53 &  68 &  56 &  78 &  38 &  48 &  NA &  14 &  79 &  

In [47]:
rE.prepare_top_experiments(results_s['accuracy'], exp_names, top_exp).set_index('Experiment').drop('wt. Mean ± SD', axis=1).rank(method='dense') - 1

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9
Experiment,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
BC-CNN,1.0,1.0,1.0,1.0,4.0,1.0,0.0,5.0,4.0
RANDOM,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
C-CNN (C),1.0,3.0,3.0,4.0,2.0,2.0,0.0,1.0,1.0
F-CNN (C),1.0,2.0,2.0,2.0,1.0,3.0,0.0,4.0,4.0
FP-CNN (C),1.0,4.0,4.0,4.0,3.0,3.0,0.0,3.0,3.0
FL-CNN (C),1.0,3.0,5.0,3.0,2.0,2.0,0.0,2.0,2.0


In [48]:
df_acc = pd.DataFrame(columns=['Accuracy'], data=(rE.prepare_top_experiments(results_s['accuracy'], exp_names, top_exp).set_index('Experiment').drop('wt. Mean ± SD', axis=1).rank(method='dense') - 1).sum(axis=1))
df_pr = pd.DataFrame(columns=['PR-AUC'], data=(rE.prepare_top_experiments(results_s['pr'], exp_names, top_exp).set_index('Experiment').drop('wt. Mean ± SD', axis=1).rank(method='dense') - 1).sum(axis=1))
df_f1 = pd.DataFrame(columns=['F1'], data=(rE.prepare_top_experiments(results_s['f1_score'], exp_names, top_exp).set_index('Experiment').drop('wt. Mean ± SD', axis=1).rank(method='dense') - 1).sum(axis=1))
print(pd.concat((df_acc, df_pr, df_f1), axis=1).astype(int).reset_index().to_latex(index=False))

\begin{tabular}{lrrr}
\toprule
 Experiment &  Accuracy &  PR-AUC &  F1 \\
\midrule
     BC-CNN &        18 &      15 &  15 \\
     RANDOM &         0 &       0 &   7 \\
  C-CNN (C) &        17 &      24 &  17 \\
  F-CNN (C) &        19 &      27 &  24 \\
 FP-CNN (C) &        25 &      20 &  20 \\
 FL-CNN (C) &        20 &      15 &  14 \\
\bottomrule
\end{tabular}



## Training Metrics Plots

In [49]:
seed = 132
experiment = '{} - Seed {}'.format(seed, seed)

### Prepare variables

#### Optional: Concatenate results and prepare for plotting

In [50]:
# Concatenate results if necessary
# for seed in range(123, 133):
#     experiment = '{} - Seed {}'.format(seed, seed)
#     experiment_folder = os.path.join(RESULTS, experiment)
#     plotting_folder = os.path.join(experiment_folder, 'Plotting')
#     rE.concat_validation_metrics(experiment_folder)
#     for file in os.listdir(plotting_folder):
#         old = os.path.join(plotting_folder, file)
#         date, _, file = file.split('_')
#         new = os.path.join(plotting_folder, file)
#         os.rename(old, new)

In [51]:
# Define top experiments
top_exper = {'2-sessions-Centralized-pre-training.xlsx': 'C-CNN (C)', 
           '4-sessions-Federated-central-pre-training.xlsx': 'F-CNN (C)', 
           '7-sessions-Federated-central-pre-training-personalization.xlsx': 'FP-CNN (C)',
           '10-sessions-Federated-central-pre-training-local-models.xlsx': 'FL-CNN (C)'}

In [52]:
# Set PLT Parameters
plt.rcParams.update({'font.size': 19, 
                     'font.family' : 'cmr10', 
                     'font.weight' : 'normal',
                     'axes.titlesize' : 22})

In [53]:
# Find and sort file paths
file_paths = [os.path.join(plotting_folder, file) for file in sorted(os.listdir(plotting_folder)) if file in top_exper]
order = [int(file_path.split('Plotting/')[1].split('-')[0]) for file_path in file_paths]
file_paths = [path for _, path in sorted(zip(order, file_paths))]

NameError: name 'plotting_folder' is not defined

In [54]:
for key in top_exper.keys():
    df = pd.DataFrame()
    for seed in range(123, 133):
        experiment = '{} - Seed {}'.format(seed, seed)
        experiment_folder = os.path.join(RESULTS, experiment)
        plotting_folder = os.path.join(experiment_folder, 'Plotting')
        for file in os.listdir(plotting_folder):
            if file == key:
                df = pd.concat((df, pd.read_excel(os.path.join(plotting_folder, file))), sort=False)
    df.groupby(['Session', 'Epoch']).mean().reset_index().to_excel(os.path.join(RESULTS, '999 - Sd Summary', 'MEAN_' + key))
    df.groupby(['Session', 'Epoch']).std().reset_index().to_excel(os.path.join(RESULTS, '999 - Sd Summary', 'STD_' + key))

In [55]:
# Find and sort summary file paths
mean_files = [os.path.join(RESULTS, '999 - Sd Summary', file) for file in os.listdir(os.path.join(RESULTS, '999 - Sd Summary')) if 'MEAN' in file]
sd_files = [os.path.join(RESULTS, '999 - Sd Summary', file) for file in os.listdir(os.path.join(RESULTS, '999 - Sd Summary')) if 'STD' in file]
order_mean = [int(file_path.split('MEAN_')[1].split('-')[0]) for file_path in mean_files]
order_sd = [int(file_path.split('STD_')[1].split('-')[0]) for file_path in sd_files]
mean_files = [path for _, path in sorted(zip(order_mean, mean_files))]
sd_files = [path for _, path in sorted(zip(order_sd, sd_files))]

### Accuracy: Train / Val

In [56]:
fig, ax = create_simple_grid(file_paths, np.arange(0,1.1,0.2), 'accuracy', top_exper)
fig.savefig(os.path.join(FIGURES, 'trainingaccuracy_{}.pdf'.format(seed)), bbox_inches = 'tight')

NameError: name 'file_paths' is not defined

### Loss: Train / Val

In [None]:
fig, ax = create_simple_grid(file_paths, np.arange(0,2.1,0.2), 'loss', top_exper)
fig.savefig(os.path.join(FIGURES, 'trainingloss_{}.pdf'.format(seed)), bbox_inches = 'tight')

## Create AVG Plot

### Loss: Train/Val

In [None]:
fig, ax = create_avg_grid(mean_files, sd_files, np.arange(0,2.1,0.2), 'loss', top_exper)
fig.savefig(os.path.join(FIGURES, 'trainingloss_MEAN.pdf'.format(seed)), bbox_inches = 'tight')

### Accuracy: Train/Val

In [None]:
fig, ax = create_avg_grid(mean_files, sd_files, np.arange(0,1.1,0.2), 'accuracy', top_exper)
fig.savefig(os.path.join(FIGURES, 'trainingaccuracy_MEAN.pdf'.format(seed)), bbox_inches = 'tight')

## Share of Pain Level 1

In [None]:
# Set PLT Parameters
plt.rcParams.update({'font.size': 18, 
                     'font.family' : 'cmr10', 
                     'font.weight' : 'normal',
                     'axes.titlesize' : 22})

In [None]:
fig, ax = draw_pain_bar_chart('Session')
fig.savefig(os.path.join(FIGURES, 'painbar.pdf'), bbox_inches = 'tight')

In [None]:
fig, ax = draw_pain_bar_chart('Person')
fig.savefig(os.path.join(FIGURES, 'painbar_person.pdf'), bbox_inches = 'tight')

## Confusion Matrix

In [None]:
conf_matrix = {}
model_types = ['PAIN_{}'.format(i) for i in [2, 4, 7, 10]]
sessions = [6]
subjects = [52, 80, 96]
for model_type in model_types:
    for session in sessions:
        for subject in subjects:
            conf_matrix[(model_type, session, subject)] = create_avg_conf_matrix(model_type, session, subject)

In [None]:
for model_type in model_types:
    print()
    print(model_type)
    print(conf_matrix[(model_type, 6, 80)])