In [1]:
import os, sys
import pandas as pd
import numpy as np
from functools import reduce

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from Scripts import Data_Loader_Functions as dL
from Scripts import Results_Evaluation as rE
RESULTS = os.path.join(module_path, 'Results', 'Thesis')
DATA = os.path.join(module_path, 'Data', 'Augmented Data')

In [38]:
# Define key variables
pivot = dL.create_pivot(os.path.join(DATA, 'group_2'), 'Session', 'Person', 'Session')
subjects = dL.create_pain_df(os.path.join(DATA, 'group_2'))['Person'].unique()
metric = 'accuracy'
view_by = 'person'

In [9]:
def results_table(experiment_path, metric, view_by, subjects, pivot):
    sessions = True if view_by in 'sessions' else False

    # Sort according to name not timestamp
    df_concat = pd.DataFrame()
    list_dir = os.listdir(experiment_path)
    f_paths = [file.split("PAIN_")[1] for file in list_dir if 'PAIN' in file]
    list_dir = [file for file in list_dir if 'PAIN' in file]
    folders = [x for _, x in sorted(zip(f_paths, list_dir))]

    for file in folders:
        if os.path.isfile(os.path.join(experiment_path, file)):
            # Read in one file per experiment
            df = pd.read_csv(os.path.join(experiment_path, file))
            df = df.rename(columns={'Unnamed: 0': 'Epoch'})

            # Compute Means per subject
            df_mean = weighted_mean_SD(df, subjects, metric, sessions, pivot)
            # noinspection PyTypeChecker
            df_mean['Experiment'] = file.split('PAIN_')[1].split('_TEST')[0]

            # Concatenate all experiments
            df_concat = pd.concat((df_concat, df_mean))
    df_concat.set_index('Experiment', inplace=True)

    # Sort rows according to experiment number
    df_concat['indexNumber'] = [int(i.split('-')[0]) for i in df_concat.index]
    df_concat = df_concat.sort_values(by='indexNumber')
    df_concat.drop('indexNumber', inplace=True, axis=1)

    # Calculate Regular Mean
    cols = [col for col in df_concat.columns.values if type(col) is int]
    df_concat['Mean'] = df_concat[cols].mean(axis=1)
    df_concat['SD'] = df_concat[cols].std(axis=1)
    return df_concat

In [79]:
def weighted_mean_SD(df, subjects, metric, sessions, pivot):
    
    # Compute Sum of all TP, TN, FP, FN
    df_total = session_examples_total(df, subjects, metric)

    # Drop 'Session' column
    weights = mask_df(df_total, metric, subjects, pivot).drop('Session', axis=1)

    # Get column names for each subject of the format "subject_43_accuracy"
    columns = ['subject_{}_{}'.format(subject, metric) for subject in subjects]

    # Filter df down to those columns, e.g. for accuracy, the new df will have accuracy per subject, per session
    df_new = df[columns]
    
    # Change column names to [43, 48, 52, 59, ..., 120]
    df_new = df_new.rename(
        columns={col: int(col.split('_')[1].split('_')[0]) for col in df_new.columns if 'subject' in col})

    # If sessions, simply transpose the data frames
    if sessions:
        weights = weights.T
        df_new = df_new.T

    # Calculate average (e.g. accuracy) accross subjects OR sessions 
    # Weighted by number of observations:
    # E.g. ACC [1.0, 0.5] & Obs. [10, 5] == weighted AVG 12.5 / 15 == 0.833
    # Returns np.array of weighted AVG for all subjects OR sessions
    weighted_avg = (df_new * weights).sum() / weights.sum()
    
    if sessions:
        weighted_avg.index += 1
    
    # Compute Mean of subjects OR sessions, weighted by number of observations across subject OR session
    weighted_avg['Weighted Mean'] = (df_new * weights).sum().sum() / weights.sum().sum()

    # Compute standard deviation between subjects OR session, weighted by number of observations in that session
    variance = ((df_new - weighted_avg) ** 2 * weights).sum().sum() / weights.sum().sum()
    weighted_avg['Weighted SD'] = np.sqrt(variance)

    # Returns weighted mean performance for each subject OR session, and mean/std performance of entire model
    # DF columns: [43, 48, 52, 59, 64, 80, 92, 96, 107, 109, 115, 120, 'Weighted Mean', 'Weighted SD']
    return pd.DataFrame(weighted_avg).T

In [56]:
def mask_df(df, metric, subjects, pivot):
    # Get column names for each subject of the format "subject_43_accuracy"
    columns = ['subject_{}_{}'.format(subject, metric) for subject in subjects]
    
    # Filter DataFrame for the columns specified
    df = df[columns].reset_index()
    
    # Change Zero-Based Session Indexing to 1-Based indexing
    df['index'] += 1
    
    # Rename Index Column to Session Column
    df = df.rename(columns={'index': 'Session'})
    
    # Rename columns to only include subject number, e.g. '43' or '59'
    df = df.rename(columns={col: int(col.split('_')[1].split('_')[0]) for col in df.columns if 'subject' in col})

    # 1. Reset index of pivot to have 1-dimensional index
    # 2. Filter Pivot table to only include same columns as df ['Session', 43, 48, 52, 59, ..., 120]
    # 3. Drop Session '0'
    # 4. Reset index to be zero-based
    pivot = pivot.reset_index()[df.columns].drop(0).reset_index(drop=True)
    
    # Return a df, where:
    # - Rows are sessions (starting with '1')
    # - Columns are ['Session', 43, 48, 52, 59, ..., 120]
    # - Values are "TOTAL FP/FN/TN/TP", or NULL, when no positive examples exist
    return df.where(pivot != '')

In [8]:
def session_examples_total(df, subjects, metric):
    df_subjects = pd.DataFrame()
    for subject in subjects:
        df_subjects['subject_{}_{}'.format(subject, metric)] = df['subject_{}_true_positives'.format(subject)] + \
                                                               df['subject_{}_true_negatives'.format(subject)] + \
                                                               df['subject_{}_false_positives'.format(subject)] + \
                                                               df['subject_{}_false_negatives'.format(subject)]
    return df_subjects

In [12]:
experiment_path = os.path.join(RESULTS, 'Original', '123 - Seed 123')

# Sort according to name not timestamp
df_concat = pd.DataFrame()
list_dir = os.listdir(experiment_path)
f_paths = [file.split("PAIN_")[1] for file in list_dir if 'PAIN' in file]
list_dir = [file for file in list_dir if 'PAIN' in file]
folders = [x for _, x in sorted(zip(f_paths, list_dir))]

In [13]:
folders

['2019-08-30-144543_PAIN_0-sessions-Baseline-central-pre-training_TEST.csv',
 '2019-08-30-144716_PAIN_0-sessions-Baseline-federated-pre-training_TEST.csv',
 '2019-09-02-100743_PAIN_0-sessions-Baseline-random_TEST.csv',
 '2019-08-30_PAIN_1-sessions-Centralized-no-pre-training',
 '2019-08-30-074557_PAIN_1-sessions-Centralized-no-pre-training_TEST.csv',
 '2019-08-30_PAIN_10-sessions-Federated-central-pre-training-local-models',
 '2019-08-30-141715_PAIN_10-sessions-Federated-central-pre-training-local-models_TEST.csv',
 '2019-08-30_PAIN_11-sessions-Federated-federated-pre-training-local-models',
 '2019-08-30-144413_PAIN_11-sessions-Federated-federated-pre-training-local-models_TEST.csv',
 '2019-08-30_PAIN_2-sessions-Centralized-pre-training',
 '2019-08-30-080348_PAIN_2-sessions-Centralized-pre-training_TEST.csv',
 '2019-08-30_PAIN_3-sessions-Federated-no-pre-training',
 '2019-08-30-084835_PAIN_3-sessions-Federated-no-pre-training_TEST.csv',
 '2019-08-30_PAIN_4-sessions-Federated-central-pr

In [17]:
df = pd.read_csv(os.path.join(experiment_path, folders[0]))
df = df.rename(columns={'Unnamed: 0': 'Epoch'})
os.path.join(experiment_path, folders[0])

'/Users/nico/PycharmProjects/FederatedLearning/Results/Thesis/Original/123 - Seed 123/2019-08-30-144543_PAIN_0-sessions-Baseline-central-pre-training_TEST.csv'

In [35]:
# Compute Sum of all TP, TN, FP, FN
df_total = session_examples_total(df, subjects, metric)
df_total = df_total[columns].reset_index()
df_total['index'] += 1
df_total = df_total.rename(columns={'index': 'Session'})
df_total = df_total.rename(columns={col: int(col.split('_')[1].split('_')[0]) for col in df.columns if 'subject' in col})
df_total

Unnamed: 0,Session,43,48,52,59,64,80,92,96,107,109,115,120
0,1,644.0,696.0,660.0,1472.0,1228.0,960.0,788.0,900.0,900.0,1260.0,692.0,1168.0
1,2,652.0,168.0,844.0,,1152.0,1408.0,900.0,1228.0,1140.0,872.0,1168.0,840.0
2,3,476.0,648.0,672.0,,568.0,812.0,960.0,1020.0,1052.0,1012.0,724.0,540.0
3,4,668.0,360.0,540.0,,1052.0,1488.0,1500.0,928.0,1648.0,752.0,900.0,300.0
4,5,400.0,924.0,904.0,,660.0,928.0,,1300.0,1352.0,1288.0,,720.0
5,6,496.0,288.0,940.0,,,988.0,,1128.0,572.0,632.0,,628.0
6,7,336.0,,636.0,,,,,868.0,600.0,808.0,,540.0
7,8,396.0,,1872.0,,,,,1140.0,,,,
8,9,,,1872.0,,,,,,,,,


In [51]:
pivot_new = pivot.reset_index()[df_total.columns].drop(0).reset_index(drop=True)

In [57]:
df_total = df_total.where(pivot_new != '')
weights = df_total.drop('Session', axis=1)
weights

Unnamed: 0,43,48,52,59,64,80,92,96,107,109,115,120
0,,696.0,,1472.0,1228.0,960.0,788.0,,,1260.0,,
1,,,,,1152.0,1408.0,,,1140.0,,1168.0,
2,,,,,,812.0,,,1052.0,,724.0,540.0
3,668.0,,,,1052.0,1488.0,1500.0,928.0,1648.0,,900.0,
4,,924.0,,,,928.0,,,,1288.0,,
5,,,940.0,,,988.0,,1128.0,,,,
6,,,,,,,,,,,,
7,,,1872.0,,,,,1140.0,,,,
8,,,1872.0,,,,,,,,,


In [61]:
columns = ['subject_{}_{}'.format(subject, metric) for subject in subjects]
df_new = df[columns]
df_new = df_new.rename(
    columns={col: int(col.split('_')[1].split('_')[0]) for col in df_new.columns if 'subject' in col})

In [69]:
weighted_avg = (df_new * weights).sum() / weights.sum()
variance = ((df_new - weighted_avg) ** 2 * weights).sum().sum() / weights.sum().sum()
std = np.sqrt(variance)
weighted_avg['Weighted Mean'] = (df_new * weights).sum().sum() / weights.sum().sum()
weighted_avg['Weighted SD'] = std

In [66]:
(df_new * weights)

Unnamed: 0,43,48,52,59,64,80,92,96,107,109,115,120
0,,531.00001,,783.999989,1163.99997,463.999987,683.999993,,,853.000005,,
1,,,,,1092.000023,851.000038,,,526.000012,,640.999991,
2,,,,,,551.999999,,,968.99998,,575.999985,411.000016
3,462.999981,,,,865.999991,1205.000004,846.000016,825.000019,980.000047,,637.000018,
4,,734.000024,,,,291.000006,,,,1173.999994,,
5,,,896.000028,,,561.99999,,635.999991,,,,
6,,,,,,,,,,,,
7,,,1632.000014,,,,,1052.000023,,,,
8,,,1798.999969,,,,,,,,,


In [67]:
weights

Unnamed: 0,43,48,52,59,64,80,92,96,107,109,115,120
0,,696.0,,1472.0,1228.0,960.0,788.0,,,1260.0,,
1,,,,,1152.0,1408.0,,,1140.0,,1168.0,
2,,,,,,812.0,,,1052.0,,724.0,540.0
3,668.0,,,,1052.0,1488.0,1500.0,928.0,1648.0,,900.0,
4,,924.0,,,,928.0,,,,1288.0,,
5,,,940.0,,,988.0,,1128.0,,,,
6,,,,,,,,,,,,
7,,,1872.0,,,,,1140.0,,,,
8,,,1872.0,,,,,,,,,
