In [1]:
import pandas as pd
import numpy as np
import os, sys
import cv2
import matplotlib.pyplot as plt
import copy
import tensorflow as tf
from io import StringIO # Python3 use: from io import StringIO
import seaborn as sns

models = tf.keras.models  # like 'from tensorflow.keras import models' (PyCharm import issue workaround)
layers = tf.keras.layers  # like 'from tensorflow.keras import layers' (PyCharm import issue workaround)

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

DATA = os.path.join(module_path, 'Data', 'Augmented Data')
RESULTS = os.path.join(module_path, 'Results', 'Thesis')
FIGURES = os.path.join(module_path, 'Figures', 'Thesis')
from Scripts import Data_Loader_Functions as dL
from Scripts import Model_Architectures as mA
from Scripts import Results_Evaluation as rE
from Scripts import Print_Functions as pF

In [2]:
GROUP_2_PATH = os.path.join(DATA, "group_2")
CENTRAL_MODELS = os.path.join(module_path, 'Pain', 'Centralized')
FEDERATED_MODELS = os.path.join(module_path, 'Pain', 'Federated')

# Add Session 1 to files and remove session 0 from BCDL

In [5]:
results_path = os.path.join(module_path, 'Results', 'Paper')

In [39]:
BCDL_df = pd.read_csv(os.path.join(results_path, '20200223 - BCDL_all.csv')).drop(columns=['Unnamed: 0'])
CDL_df = pd.read_csv(os.path.join(results_path, '20200223 - CDL_all.csv')).drop(columns=['Unnamed: 0'])
FDL_df = pd.read_csv(os.path.join(results_path, '20200223 - FDL_all.csv')).drop(columns=['Unnamed: 0'])
LDL_df = pd.read_csv(os.path.join(results_path, '20200223 - LDL_all.csv')).drop(columns=['Unnamed: 0'])
PFDL_df = pd.read_csv(os.path.join(results_path, '20200223 - PFDL_all.csv')).drop(columns=['Unnamed: 0'])

In [40]:
session_1 = BCDL_df[BCDL_df['Session'] == 1]

In [41]:
BCDL_df = BCDL_df[~(BCDL_df['Session'] == 0)]
CDL_df = CDL_df
FDL_df = pd.concat((FDL_df, session_1.rename(columns={'BCDL': 'FDL'})))
LDL_df = pd.concat((LDL_df, session_1.rename(columns={'BCDL': 'LDL'})))
PFDL_df = pd.concat((PFDL_df, session_1.rename(columns={'BCDL': 'PFDL'})))

In [44]:
BCDL_df.to_csv(os.path.join(results_path, '20200223 - BCDL_all_final.csv'))
CDL_df.to_csv(os.path.join(results_path, '20200223 - CDL_all_final.csv'))
FDL_df.to_csv(os.path.join(results_path, '20200223 - FDL_all_final.csv'))
LDL_df.to_csv(os.path.join(results_path, '20200223 - LDL_all_final.csv'))
PFDL_df.to_csv(os.path.join(results_path, '20200223 - PFDL_all_final.csv'))

# Merge Files

In [55]:
BCDL_df = pd.read_csv(os.path.join(results_path, '20200223 - BCDL_all_final_positive.csv')).drop(columns=['Unnamed: 0.1']).drop(columns=['Unnamed: 0'])
CDL_df = pd.read_csv(os.path.join(results_path, '20200223 - CDL_all_final_positive.csv')).drop(columns=['Unnamed: 0.1']).drop(columns=['Unnamed: 0'])
FDL_df = pd.read_csv(os.path.join(results_path, '20200223 - FDL_all_final_positive.csv')).drop(columns=['Unnamed: 0.1']).drop(columns=['Unnamed: 0'])
LDL_df = pd.read_csv(os.path.join(results_path, '20200223 - LDL_all_final_positive.csv')).drop(columns=['Unnamed: 0.1']).drop(columns=['Unnamed: 0'])
PFDL_df = pd.read_csv(os.path.join(results_path, '20200223 - PFDL_all_final_positive.csv')).drop(columns=['Unnamed: 0.1']).drop(columns=['Unnamed: 0'])

In [70]:
tp = FDL_df.groupby(by=['Seed', 'Subject ID'])['TP'].sum()
tn = FDL_df.groupby(by=['Seed', 'Subject ID'])['TN'].sum()
fp = FDL_df.groupby(by=['Seed', 'Subject ID'])['FP'].sum()
fn = FDL_df.groupby(by=['Seed', 'Subject ID'])['FN'].sum()
pd.DataFrame((tp + tn) / (tp + tn + fp + fn)).T.melt()

Unnamed: 0,Seed,Subject ID,value
0,123,43,0.694611
1,123,48,0.789506
2,123,52,0.919086
3,123,59,0.489810
4,123,64,0.912587
5,123,80,0.624392
6,123,92,0.696678
7,123,96,0.835106
8,123,107,0.728125
9,123,109,0.787677


In [73]:
concat_df = BCDL_df.merge(CDL_df, how='inner', on=['Subject ID', 'Session', 'Frame', 'True Label', 'Seed'])
concat_df = concat_df.merge(FDL_df, how='inner', on=['Subject ID', 'Session', 'Frame', 'True Label', 'Seed'])
concat_df = concat_df.merge(LDL_df, how='inner', on=['Subject ID', 'Session', 'Frame', 'True Label', 'Seed'])
concat_df = concat_df.merge(PFDL_df, how='inner', on=['Subject ID', 'Session', 'Frame', 'True Label', 'Seed'])

In [74]:
len(concat_df)

86179840

In [12]:
concat_df = concat_df[['Seed', 'Session', 'Subject ID', 'Frame', 'True Label', 'BCDL', 'CDL', 'FDL', 'LDL', 'PFDL']]

In [13]:
concat_df = concat_df.sort_values(['Seed', 'Session', 'Subject ID', 'Frame'])

KeyboardInterrupt: 

In [11]:
concat_df

Unnamed: 0,Subject ID,Session,Frame,True Label,BCDL,Seed,CDL,FDL,LDL,PFDL
0,109,2,39,0,0.000071,130,0.023578,0.000458,4.784829e-04,6.506354e-04
1,109,2,39,0,0.000071,130,0.023578,0.000458,4.784829e-04,2.501447e-05
2,109,2,39,0,0.000071,130,0.023578,0.000458,4.784829e-04,6.942935e-04
3,109,2,39,0,0.000071,130,0.023578,0.000458,4.784829e-04,1.808530e-03
4,109,2,39,0,0.000071,130,0.023578,0.000458,2.208471e-05,6.506354e-04
5,109,2,39,0,0.000071,130,0.023578,0.000458,2.208471e-05,2.501447e-05
6,109,2,39,0,0.000071,130,0.023578,0.000458,2.208471e-05,6.942935e-04
7,109,2,39,0,0.000071,130,0.023578,0.000458,2.208471e-05,1.808530e-03
8,109,2,39,0,0.000071,130,0.023578,0.000458,3.751944e-04,6.506354e-04
9,109,2,39,0,0.000071,130,0.023578,0.000458,3.751944e-04,2.501447e-05


In [30]:
concat_df.to_csv('Concatenated.csv')

# Evaluation

In [32]:
df = pd.read_csv('Concatenated.csv')
df['True Label'] = np.minimum(df['True Label'], 1)
columns = ['BCDL', 'CDL', 'FDL', 'LDL', 'PFDL']

In [33]:
res_columns = [col + '_pred' for col in columns]
df = pd.concat((df, (df[columns] > 0.5).astype(int).rename(columns=dict(zip(columns, res_columns)))), axis=1)

In [35]:
from sklearn.metrics.classification import confusion_matrix

In [74]:
conf_m = np.array([confusion_matrix(sd_df['True Label'], sd_df['BCDL_pred']) for _, sd_df in df.groupby('Seed')])

In [82]:
conf_m.mean(axis=0)

array([[13906.9,  2555.1],
       [ 1446.3,  1636.7]])