In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np


def loadResult(filePath):
    return pd.read_csv(filePath,
                        header=None,
                        skiprows=2,
                        names=['IdScenary','TotalRepeats','TotalFolds','ExecutionNumber','CurrentRepeatNumber', 'CurrentFoldNumber',
                               'DatabaseName','NumProcessUsed','ElipsedTrainingTime','ElipsedPredict1Time','ElipsedPredict2Time',
                               'ErrorPredict1', 'ErrorPredict2'
                               ])


#creating an empty dataframe
dfResults = pd.DataFrame(columns=['IdScenary','TotalRepeats','TotalFolds','ExecutionNumber','CurrentRepeatNumber', 'CurrentFoldNumber',
                               'DatabaseName','NumProcessUsed','ElipsedTrainingTime','ElipsedPredict1Time','ElipsedPredict2Time',
                               'ErrorPredict1', 'ErrorPredict2'
                               ])

#appending results
dfResults = pd.concat([dfResults, loadResult('~/output/output_1-ElectricalFaultDetection-4-30-10.txt')], ignore_index=True)
dfResults = pd.concat([dfResults, loadResult('~/output/output_2-ElectricalFaultDetection-8-30-10.txt')], ignore_index=True)
dfResults = pd.concat([dfResults, loadResult('~/output/output_3-ElectricalFaultDetection-16-30-10.txt')], ignore_index=True)
dfResults = pd.concat([dfResults, loadResult('~/output/output_4-ElectricalFaultDetection-32-30-10.txt')], ignore_index=True)
dfResults = pd.concat([dfResults, loadResult('~/output/output_5-Iris-4-30-10.txt')], ignore_index=True)
dfResults = pd.concat([dfResults, loadResult('~/output/output_6-Iris-8-30-10.txt')], ignore_index=True)
dfResults = pd.concat([dfResults, loadResult('~/output/output_7-Iris-16-30-10.txt')], ignore_index=True)
dfResults = pd.concat([dfResults, loadResult('~/output/output_8-Iris-32-30-10.txt')], ignore_index=True)


#creating categorical to be possible sort dataframe by the amount of observations
dfResults['DatabaseName'] = pd.Categorical(dfResults['DatabaseName'], ["Iris","ElectricalFaultDetection"])

#creating accuracy columns
dfResults["AccuracyPredict1"] = (1 - dfResults["ErrorPredict1"])*100
dfResults["AccuracyPredict2"] = (1 - dfResults["ErrorPredict2"])*100

#sorting by desired values
dfResults = dfResults.sort_values(by=['DatabaseName', 'NumProcessUsed'])

dfResults

Unnamed: 0,IdScenary,TotalRepeats,TotalFolds,ExecutionNumber,CurrentRepeatNumber,CurrentFoldNumber,DatabaseName,NumProcessUsed,ElipsedTrainingTime,ElipsedPredict1Time,ElipsedPredict2Time,ErrorPredict1,ErrorPredict2,AccuracyPredict1,AccuracyPredict2
1200,5,30,10,0,0,0,Iris,4,0.035079,0.000262,0.001783,0.000000,0.000000,100.000000,100.000000
1201,5,30,10,1,0,1,Iris,4,0.010136,0.000184,0.001433,0.000000,0.000000,100.000000,100.000000
1202,5,30,10,2,0,2,Iris,4,0.015308,0.000181,0.001490,0.133333,0.133333,86.666667,86.666667
1203,5,30,10,3,0,3,Iris,4,0.009030,0.000179,0.001364,0.066667,0.066667,93.333333,93.333333
1204,5,30,10,4,0,4,Iris,4,0.013876,0.000178,0.001559,0.066667,0.066667,93.333333,93.333333
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1195,4,30,10,295,29,5,ElectricalFaultDetection,32,1.943563,0.001010,0.356643,0.005833,0.005000,99.416667,99.500000
1196,4,30,10,296,29,6,ElectricalFaultDetection,32,2.351866,0.001090,0.400895,0.005833,0.004167,99.416667,99.583333
1197,4,30,10,297,29,7,ElectricalFaultDetection,32,2.067167,0.001063,0.370410,0.009167,0.004167,99.083333,99.583333
1198,4,30,10,298,29,8,ElectricalFaultDetection,32,1.845271,0.001065,0.376891,0.008333,0.005833,99.166667,99.416667


In [15]:
dfStatistics = dfResults.describe()

dfStatistics

Unnamed: 0,ElipsedTrainingTime,ElipsedPredict1Time,ElipsedPredict2Time,ErrorPredict1,ErrorPredict2,AccuracyPredict1,AccuracyPredict2
count,2400.0,2400.0,2400.0,2400.0,2400.0,2400.0,2400.0
mean,1.182137,0.000626,0.184122,0.036357,0.031207,96.364339,96.879262
std,1.225232,0.000432,0.183761,0.052496,0.049505,5.249628,4.950528
min,0.003522,0.000171,0.00094,0.0,0.0,66.666667,60.0
25%,0.014475,0.000192,0.001725,0.004167,0.003331,93.333333,93.333333
50%,0.738577,0.000702,0.133259,0.008333,0.006667,99.166667,99.333333
75%,2.176301,0.001041,0.367374,0.066667,0.066667,99.583333,99.666944
max,3.97388,0.002016,0.465058,0.333333,0.4,100.0,100.0


In [16]:
groupedByDatabaseProcessRepeat = dfResults.groupby(
                                ['DatabaseName','NumProcessUsed', 'CurrentRepeatNumber']
                              ).agg(
                                     ElipsedTimeTraining=('ElipsedTrainingTime','sum'),
                                     ElipsedTimePredict1=('ElipsedPredict1Time','sum'),
                                     ElipsedTimePredict2=('ElipsedPredict2Time','sum'),
                                     AccuracyPredict1=('AccuracyPredict1','mean'),
                                     AccuracyPredict2=('AccuracyPredict2','mean')
                                    )

groupedByDatabaseProcessRepeat

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,ElipsedTimeTraining,ElipsedTimePredict1,ElipsedTimePredict2,AccuracyPredict1,AccuracyPredict2
DatabaseName,NumProcessUsed,CurrentRepeatNumber,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Iris,4,0,0.152997,0.001898,0.016260,92.666667,93.333333
Iris,4,1,0.121468,0.001870,0.016114,94.666667,95.333333
Iris,4,2,0.127223,0.001800,0.016371,92.000000,95.333333
Iris,4,3,0.131016,0.001798,0.016997,95.333333,95.333333
Iris,4,4,0.140571,0.001810,0.016907,93.333333,96.000000
...,...,...,...,...,...,...,...
ElectricalFaultDetection,32,25,19.500702,0.010440,3.724508,99.058382,99.300062
ElectricalFaultDetection,32,26,20.055309,0.010655,3.772384,99.283403,99.258389
ElectricalFaultDetection,32,27,20.396502,0.010566,3.844584,99.258375,99.441715
ElectricalFaultDetection,32,28,20.604824,0.010489,3.834880,99.233354,99.400007


In [17]:
sumarizedByDatabaseProcessRepeat = groupedByDatabaseProcessRepeat.reset_index()

sumarizedByDatabaseProcessRepeat

Unnamed: 0,DatabaseName,NumProcessUsed,CurrentRepeatNumber,ElipsedTimeTraining,ElipsedTimePredict1,ElipsedTimePredict2,AccuracyPredict1,AccuracyPredict2
0,Iris,4,0,0.152997,0.001898,0.016260,92.666667,93.333333
1,Iris,4,1,0.121468,0.001870,0.016114,94.666667,95.333333
2,Iris,4,2,0.127223,0.001800,0.016371,92.000000,95.333333
3,Iris,4,3,0.131016,0.001798,0.016997,95.333333,95.333333
4,Iris,4,4,0.140571,0.001810,0.016907,93.333333,96.000000
...,...,...,...,...,...,...,...,...
235,ElectricalFaultDetection,32,25,19.500702,0.010440,3.724508,99.058382,99.300062
236,ElectricalFaultDetection,32,26,20.055309,0.010655,3.772384,99.283403,99.258389
237,ElectricalFaultDetection,32,27,20.396502,0.010566,3.844584,99.258375,99.441715
238,ElectricalFaultDetection,32,28,20.604824,0.010489,3.834880,99.233354,99.400007


In [19]:
#create table with all data sumarized

sumarizedGroup = sumarizedByDatabaseProcessRepeat.groupby(
                                ['DatabaseName','NumProcessUsed']
                                ).agg({
    'ElipsedTimeTraining': [('Mean', 'mean'),('Std', 'std')],  # Subcolunas dentro de 'Valor1'
    'ElipsedTimePredict1': [('Mean', 'mean'),('Std', 'std')],  # Subcolunas dentro de 'Valor1'
    'ElipsedTimePredict2': [('Mean', 'mean'),('Std', 'std')],  # Subcolunas dentro de 'Valor1'
    'AccuracyPredict1': [('Acuracia', 'mean'), ('Std', 'std')],  # Subcolunas dentro de 'Valor2'
    'AccuracyPredict2': [('Acuracia', 'mean'), ('Std', 'std')]  # Subcolunas dentro de 'Valor2'
     }).rename(columns={'ElipsedTimeTraining': 'Tempo Treinamento',
                   'ElipsedTimePredict1': 'Tempo Predict1',
                   'ElipsedTimePredict2': 'Tempo Predict2',
                   'AccuracyPredict1': 'Acuracia1',
                   'AccuracyPredict2': 'Acuracia2'
                   })

sumarizedGroup[('Tempo Treinamento', 'Mean')] = sumarizedGroup[('Tempo Treinamento', 'Mean')].map(lambda x: f"{x:.2f}")
sumarizedGroup[('Tempo Treinamento', 'Std')] = sumarizedGroup[('Tempo Treinamento', 'Std')].map(lambda x: f"{x:.2f}")
sumarizedGroup[('Tempo Predict1', 'Mean')] = sumarizedGroup[('Tempo Predict1', 'Mean')].map(lambda x: f"{x:.4f}")
sumarizedGroup[('Tempo Predict2', 'Mean')] = sumarizedGroup[('Tempo Predict2', 'Mean')].map(lambda x: f"{x:.4f}")
sumarizedGroup[('Acuracia1', 'Acuracia')] = sumarizedGroup[('Acuracia1', 'Acuracia')].map(lambda x: f"{x:.2f}")
sumarizedGroup[('Acuracia1', 'Std')] = sumarizedGroup[('Acuracia1', 'Std')].map(lambda x: f"{x:.2f}")
sumarizedGroup[('Acuracia2', 'Acuracia')] = sumarizedGroup[('Acuracia2', 'Acuracia')].map(lambda x: f"{x:.2f}")
sumarizedGroup[('Acuracia2', 'Std')] = sumarizedGroup[('Acuracia2', 'Std')].map(lambda x: f"{x:.2f}")


sumarizedGroup



Unnamed: 0_level_0,Unnamed: 1_level_0,Tempo Treinamento,Tempo Treinamento,Tempo Predict1,Tempo Predict1,Tempo Predict2,Tempo Predict2,Acuracia1,Acuracia1,Acuracia2,Acuracia2
Unnamed: 0_level_1,Unnamed: 1_level_1,Mean,Std,Mean,Std,Mean,Std,Acuracia,Std,Acuracia,Std
DatabaseName,NumProcessUsed,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
Iris,4,0.14,0.02,0.0018,4.5e-05,0.0167,0.001017,93.73,1.33,94.51,1.4
Iris,8,0.15,0.02,0.0019,2.6e-05,0.0171,0.000707,93.31,1.13,94.33,1.28
Iris,16,0.15,0.02,0.002,5.5e-05,0.0175,0.000931,93.8,0.98,94.49,0.87
Iris,32,0.18,0.02,0.0023,0.000153,0.0192,0.001982,93.29,1.36,94.29,1.17
ElectricalFaultDetection,4,31.67,0.93,0.011,0.000168,3.5943,0.10248,99.2,0.1,99.32,0.1
ElectricalFaultDetection,8,21.44,0.69,0.0101,0.000248,3.6168,0.103481,99.19,0.09,99.37,0.07
ElectricalFaultDetection,16,20.98,0.6,0.0104,0.000359,3.6985,0.087,99.19,0.09,99.36,0.09
ElectricalFaultDetection,32,19.87,0.49,0.0107,0.000282,3.7495,0.068477,99.2,0.08,99.36,0.08


In [20]:
df_tex = sumarizedGroup.to_latex(index=True, escape=False)
print(df_tex)

\begin{tabular}{lllllrlrllll}
\toprule
                         &    & \multicolumn{2}{l}{Tempo Treinamento} & \multicolumn{2}{l}{Tempo Predict1} & \multicolumn{2}{l}{Tempo Predict2} & \multicolumn{2}{l}{Acuracia1} & \multicolumn{2}{l}{Acuracia2} \\
                         &    &              Mean &   Std &           Mean &       Std &           Mean &       Std &  Acuracia &   Std &  Acuracia &   Std \\
DatabaseName & NumProcessUsed &                   &       &                &           &                &           &           &       &           &       \\
\midrule
Iris & 4  &              0.14 &  0.02 &         0.0018 &  0.000045 &         0.0167 &  0.001017 &     93.73 &  1.33 &     94.51 &  1.40 \\
                         & 8  &              0.15 &  0.02 &         0.0019 &  0.000026 &         0.0171 &  0.000707 &     93.31 &  1.13 &     94.33 &  1.28 \\
                         & 16 &              0.15 &  0.02 &         0.0020 &  0.000055 &         0.0175 &  0.000931 &     93.

  df_tex = sumarizedGroup.to_latex(index=True, escape=False)
