In [37]:
import pandas as pd
from IPython.core.display import display, HTML

display(HTML("<style>.container { width:100% !important; }</style>"))

cols_dtype = {'batch': int,
              'dk_cells': int,
              'training_cells': int,
              'clean_dataset_ratio': float,
              'precision': float,
              'recall': float,
              'repairing_recall': float,
              'f1': float,
              'repairing_f1': float,
              'detected_errors': int,
              'total_errors': int,
              'correct_repairs': int,
              'total_repairs': int,
              'repairs_on_correct_cells': int,
              'repairs_on_incorrect_cells': int}

cols_formats = {'clean_dataset_ratio': '{:1.3f}',
                'precision': '{:1.3f}',
                'recall': '{:1.3f}',
                'repairing_recall': '{:1.3f}',
                'f1': '{:1.3f}',
                'repairing_f1': '{:1.3f}'}

approach_description_list = ['1) Baseline - One-Shot Execution',
                             '2) Baseline - Incremental, recomputing statistics from scratch',
                             '3) Proposal - Incremental, maintaining statistics incrementally',
                             '4) Proposal - Incremental, maintaining also the model parameters incrementally',
                             '5) Proposal - Incremental, skipping the training phase for some batches',
                             '6) Proposal - Incremental, performing training without repeating cells already used for training',
                             '7) Proposal - Incremental, without trying to repair cells already analyzed in previous batches']

In [38]:
####################################################################################################
# REPAIRING QUALITY: Results for one-shot execution.
####################################################################################################

dir_name = 'hospital/one_shot'

df_baseline_1 = pd.read_csv(dir_name + '/repairing_quality/baseline_1.csv',
                            sep=';',
                            index_col=0,
                            dtype=cols_dtype)

df_baseline_1_view = df_baseline_1.style.format(cols_formats)

print(approach_description_list[0])
df_baseline_1_view

1) Baseline - One-Shot Execution


Unnamed: 0_level_0,dk_cells,training_cells,clean_dataset_ratio,precision,recall,repairing_recall,f1,repairing_f1,detected_errors,total_errors,correct_repairs,total_repairs,repairs_on_correct_cells,repairs_on_incorrect_cells
batch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,11265,6592,0.984,1.0,0.456,0.533,0.626,0.696,435,509,232,232,0,232


In [39]:
####################################################################################################
# REPAIRING QUALITY: Results for 2 batches.
####################################################################################################

dir_name = 'hospital/2_batches'

df_2b_baseline_2 = pd.read_csv(dir_name + '/repairing_quality/baseline_2.csv',
                               sep=';',
                               index_col=0,
                               dtype=cols_dtype)

df_2b_proposal_3 = pd.read_csv(dir_name + '/repairing_quality/proposal_3.csv',
                               sep=';',
                               index_col=0,
                               dtype=cols_dtype)

df_2b_proposal_4 = pd.read_csv(dir_name + '/repairing_quality/proposal_4.csv',
                               sep=';',
                               index_col=0,
                               dtype=cols_dtype)

df_2b_proposal_5 = pd.read_csv(dir_name + '/repairing_quality/proposal_5.csv',
                               sep=';',
                               index_col=0,
                               dtype=cols_dtype)

df_2b_proposal_6 = pd.read_csv(dir_name + '/repairing_quality/proposal_6.csv',
                               sep=';',
                               index_col=0,
                               dtype=cols_dtype)

df_2b_proposal_7 = pd.read_csv(dir_name + '/repairing_quality/proposal_7.csv',
                               sep=';',
                               index_col=0,
                               dtype=cols_dtype)

df_2b_view_list = [df_baseline_1.style.format(cols_formats),
                   df_2b_baseline_2.style.format(cols_formats),
                   df_2b_proposal_3.style.format(cols_formats),
                   df_2b_proposal_4.style.format(cols_formats),
                   df_2b_proposal_5.style.format(cols_formats),
                   df_2b_proposal_6.style.format(cols_formats),
                   df_2b_proposal_7.style.format(cols_formats)]

for description, df in zip(approach_description_list, df_2b_view_list):
    print(description)
    display(df)

1) Baseline - One-Shot Execution


Unnamed: 0_level_0,dk_cells,training_cells,clean_dataset_ratio,precision,recall,repairing_recall,f1,repairing_f1,detected_errors,total_errors,correct_repairs,total_repairs,repairs_on_correct_cells,repairs_on_incorrect_cells
batch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,11265,6592,0.984,1.0,0.456,0.533,0.626,0.696,435,509,232,232,0,232


2) Baseline - Incremental, recomputing statistics from scratch


Unnamed: 0_level_0,dk_cells,training_cells,clean_dataset_ratio,precision,recall,repairing_recall,f1,repairing_f1,detected_errors,total_errors,correct_repairs,total_repairs,repairs_on_correct_cells,repairs_on_incorrect_cells
batch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,5311,3656,0.982,0.967,0.439,0.513,0.604,0.671,226,264,116,120,4,116
2,9126,8530,0.985,1.0,0.373,0.46,0.543,0.63,322,397,148,148,0,148


3) Proposal - Incremental, maintaining statistics incrementally


Unnamed: 0_level_0,dk_cells,training_cells,clean_dataset_ratio,precision,recall,repairing_recall,f1,repairing_f1,detected_errors,total_errors,correct_repairs,total_repairs,repairs_on_correct_cells,repairs_on_incorrect_cells
batch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,5311,3656,0.982,0.967,0.439,0.513,0.604,0.671,226,264,116,120,4,116
2,9126,8530,0.985,1.0,0.373,0.46,0.543,0.63,322,397,148,148,0,148


4) Proposal - Incremental, maintaining also the model parameters incrementally


Unnamed: 0_level_0,dk_cells,training_cells,clean_dataset_ratio,precision,recall,repairing_recall,f1,repairing_f1,detected_errors,total_errors,correct_repairs,total_repairs,repairs_on_correct_cells,repairs_on_incorrect_cells
batch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,5311,3656,0.982,0.967,0.439,0.513,0.604,0.671,226,264,116,120,4,116
2,9126,8530,0.985,1.0,0.373,0.46,0.543,0.63,322,397,148,148,0,148


5) Proposal - Incremental, skipping the training phase for some batches


Unnamed: 0_level_0,dk_cells,training_cells,clean_dataset_ratio,precision,recall,repairing_recall,f1,repairing_f1,detected_errors,total_errors,correct_repairs,total_repairs,repairs_on_correct_cells,repairs_on_incorrect_cells
batch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,5311,3656,0.982,0.967,0.439,0.513,0.604,0.671,226,264,116,120,4,116
2,9126,0,0.984,1.0,0.295,0.363,0.455,0.533,322,397,117,117,0,117


6) Proposal - Incremental, performing training without repeating cells already used for training


Unnamed: 0_level_0,dk_cells,training_cells,clean_dataset_ratio,precision,recall,repairing_recall,f1,repairing_f1,detected_errors,total_errors,correct_repairs,total_repairs,repairs_on_correct_cells,repairs_on_incorrect_cells
batch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,5311,3656,0.982,0.967,0.439,0.513,0.604,0.671,226,264,116,120,4,116
2,9126,5556,0.985,1.0,0.375,0.463,0.546,0.633,322,397,149,149,0,149


7) Proposal - Incremental, without trying to repair cells already analyzed in previous batches


Unnamed: 0_level_0,dk_cells,training_cells,clean_dataset_ratio,precision,recall,repairing_recall,f1,repairing_f1,detected_errors,total_errors,correct_repairs,total_repairs,repairs_on_correct_cells,repairs_on_incorrect_cells
batch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,5311,3656,0.982,0.967,0.439,0.513,0.604,0.671,226,264,116,120,4,116
2,5292,3590,0.993,1.0,0.494,0.579,0.661,0.733,209,245,121,121,0,121


In [40]:
####################################################################################################
# REPAIRING QUALITY: Results for 10 batches.
####################################################################################################

dir_name = 'hospital/10_batches'

df_10b_baseline_2 = pd.read_csv(dir_name + '/repairing_quality/baseline_2.csv',
                                sep=';',
                                index_col=0,
                                dtype=cols_dtype)

df_10b_proposal_3 = pd.read_csv(dir_name + '/repairing_quality/proposal_3.csv',
                                sep=';',
                                index_col=0,
                                dtype=cols_dtype)

df_10b_proposal_4 = pd.read_csv(dir_name + '/repairing_quality/proposal_4.csv',
                                sep=';',
                                index_col=0,
                                dtype=cols_dtype)

df_10b_proposal_5 = pd.read_csv(dir_name + '/repairing_quality/proposal_5.csv',
                                sep=';',
                                index_col=0,
                                dtype=cols_dtype)

df_10b_proposal_6 = pd.read_csv(dir_name + '/repairing_quality/proposal_6.csv',
                                sep=';',
                                index_col=0,
                                dtype=cols_dtype)

df_10b_proposal_7 = pd.read_csv(dir_name + '/repairing_quality/proposal_7.csv',
                                sep=';',
                                index_col=0,
                                dtype=cols_dtype)

df_10b_view_list = [df_baseline_1.style.format(cols_formats),
                    df_10b_baseline_2.style.format(cols_formats),
                    df_10b_proposal_3.style.format(cols_formats),
                    df_10b_proposal_4.style.format(cols_formats),
                    df_10b_proposal_5.style.format(cols_formats),
                    df_10b_proposal_6.style.format(cols_formats),
                    df_10b_proposal_7.style.format(cols_formats)]

for description, df in zip(approach_description_list, df_10b_view_list):
    print(description)
    display(df)

1) Baseline - One-Shot Execution


Unnamed: 0_level_0,dk_cells,training_cells,clean_dataset_ratio,precision,recall,repairing_recall,f1,repairing_f1,detected_errors,total_errors,correct_repairs,total_repairs,repairs_on_correct_cells,repairs_on_incorrect_cells
batch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,11265,6592,0.984,1.0,0.456,0.533,0.626,0.696,435,509,232,232,0,232


2) Baseline - Incremental, recomputing statistics from scratch


Unnamed: 0_level_0,dk_cells,training_cells,clean_dataset_ratio,precision,recall,repairing_recall,f1,repairing_f1,detected_errors,total_errors,correct_repairs,total_repairs,repairs_on_correct_cells,repairs_on_incorrect_cells
batch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,787,1036,0.978,1.0,0.26,0.277,0.413,0.433,47,50,13,33,0,13
2,1475,1781,0.982,1.0,0.237,0.279,0.384,0.437,68,80,19,40,0,19
3,2571,2906,0.979,1.0,0.132,0.155,0.234,0.269,103,121,16,26,0,16
4,3198,3955,0.964,0.151,0.119,0.141,0.133,0.146,135,160,19,132,107,19
5,4306,4615,0.969,0.949,0.122,0.138,0.216,0.241,268,304,37,50,0,39
6,4914,5862,0.972,1.0,0.102,0.121,0.186,0.216,273,322,33,37,0,33
7,4901,6749,0.975,1.0,0.116,0.138,0.207,0.242,283,337,39,50,0,39
8,5369,8732,0.976,1.0,0.092,0.11,0.169,0.198,300,358,33,43,0,33
9,5461,10230,0.978,1.0,0.06,0.073,0.114,0.137,300,365,22,39,0,22
10,6086,11366,0.979,1.0,0.086,0.105,0.158,0.19,315,385,33,43,0,33


3) Proposal - Incremental, maintaining statistics incrementally


Unnamed: 0_level_0,dk_cells,training_cells,clean_dataset_ratio,precision,recall,repairing_recall,f1,repairing_f1,detected_errors,total_errors,correct_repairs,total_repairs,repairs_on_correct_cells,repairs_on_incorrect_cells
batch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,787,1036,0.978,1.0,0.26,0.277,0.413,0.433,47,50,13,33,0,13
2,1475,1781,0.982,1.0,0.237,0.279,0.384,0.437,68,80,19,40,0,19
3,2571,2906,0.979,1.0,0.132,0.155,0.234,0.269,103,121,16,26,0,16
4,3198,3955,0.964,0.151,0.119,0.141,0.133,0.146,135,160,19,132,107,19
5,4306,4615,0.969,0.949,0.122,0.138,0.216,0.241,268,304,37,50,0,39
6,4914,5862,0.972,1.0,0.102,0.121,0.186,0.216,273,322,33,37,0,33
7,4901,6749,0.975,1.0,0.116,0.138,0.207,0.242,283,337,39,50,0,39
8,5369,8732,0.976,1.0,0.092,0.11,0.169,0.198,300,358,33,43,0,33
9,5461,10230,0.978,1.0,0.06,0.073,0.114,0.137,300,365,22,39,0,22
10,6086,11366,0.979,1.0,0.086,0.105,0.158,0.19,315,385,33,43,0,33


4) Proposal - Incremental, maintaining also the model parameters incrementally


Unnamed: 0_level_0,dk_cells,training_cells,clean_dataset_ratio,precision,recall,repairing_recall,f1,repairing_f1,detected_errors,total_errors,correct_repairs,total_repairs,repairs_on_correct_cells,repairs_on_incorrect_cells
batch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,787,1036,0.978,1.0,0.26,0.277,0.413,0.433,47,50,13,33,0,13
2,1475,1781,0.982,1.0,0.237,0.279,0.384,0.437,68,80,19,40,0,19
3,2571,2906,0.979,1.0,0.132,0.155,0.234,0.269,103,121,16,26,0,16
4,3198,3955,0.963,0.15,0.119,0.141,0.132,0.145,135,160,19,133,108,19
5,4306,4615,0.968,0.949,0.121,0.138,0.215,0.24,269,305,37,50,0,39
6,4914,5862,0.972,1.0,0.102,0.12,0.185,0.215,274,323,33,37,0,33
7,4901,6749,0.975,1.0,0.115,0.137,0.207,0.241,284,338,39,50,0,39
8,5369,8732,0.976,1.0,0.092,0.11,0.168,0.198,301,359,33,43,0,33
9,5461,10230,0.978,1.0,0.06,0.073,0.113,0.136,301,366,22,39,0,22
10,6086,11366,0.979,1.0,0.085,0.104,0.158,0.189,316,386,33,43,0,33


5) Proposal - Incremental, skipping the training phase for some batches


Unnamed: 0_level_0,dk_cells,training_cells,clean_dataset_ratio,precision,recall,repairing_recall,f1,repairing_f1,detected_errors,total_errors,correct_repairs,total_repairs,repairs_on_correct_cells,repairs_on_incorrect_cells
batch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,787,1036,0.978,1.0,0.26,0.277,0.413,0.433,47,50,13,33,0,13
2,1475,1781,0.982,1.0,0.237,0.279,0.384,0.437,68,80,19,40,0,19
3,2571,2906,0.979,1.0,0.132,0.155,0.234,0.269,103,121,16,26,0,16
4,3198,3955,0.963,0.15,0.119,0.141,0.132,0.145,135,160,19,133,108,19
5,4306,4615,0.968,0.949,0.121,0.138,0.215,0.24,269,305,37,50,0,39
6,4914,5862,0.972,1.0,0.102,0.12,0.185,0.215,274,323,33,37,0,33
7,4901,6749,0.975,1.0,0.115,0.137,0.207,0.241,284,338,39,50,0,39
8,5369,0,0.975,1.0,0.07,0.083,0.13,0.153,301,359,25,35,0,25
9,5961,0,0.977,1.0,0.053,0.065,0.102,0.122,309,374,20,37,0,20
10,6602,0,0.978,1.0,0.056,0.067,0.105,0.126,326,396,22,32,0,22


6) Proposal - Incremental, performing training without repeating cells already used for training


Unnamed: 0_level_0,dk_cells,training_cells,clean_dataset_ratio,precision,recall,repairing_recall,f1,repairing_f1,detected_errors,total_errors,correct_repairs,total_repairs,repairs_on_correct_cells,repairs_on_incorrect_cells
batch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,787,1036,0.978,1.0,0.26,0.277,0.413,0.433,47,50,13,33,0,13
2,1475,999,0.981,1.0,0.212,0.25,0.351,0.4,68,80,17,38,0,17
3,2620,1222,0.982,1.0,0.26,0.305,0.413,0.467,105,123,32,42,0,32
4,2688,1594,0.963,0.131,0.13,0.157,0.131,0.143,121,146,19,151,126,19
5,3819,1210,0.967,0.646,0.136,0.153,0.225,0.248,274,309,42,76,14,51
6,3951,1496,0.97,0.929,0.077,0.113,0.143,0.201,231,336,26,32,0,28
7,4376,1332,0.973,1.0,0.087,0.125,0.159,0.222,248,358,31,42,0,31
8,4813,1625,0.974,0.93,0.103,0.147,0.186,0.254,272,387,40,53,2,41
9,5051,1432,0.976,1.0,0.062,0.11,0.116,0.198,218,389,24,41,0,24
10,5549,1426,0.978,1.0,0.076,0.134,0.142,0.236,232,407,31,41,0,31


7) Proposal - Incremental, without trying to repair cells already analyzed in previous batches


Unnamed: 0_level_0,dk_cells,training_cells,clean_dataset_ratio,precision,recall,repairing_recall,f1,repairing_f1,detected_errors,total_errors,correct_repairs,total_repairs,repairs_on_correct_cells,repairs_on_incorrect_cells
batch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,787,1036,0.978,1.0,0.26,0.277,0.413,0.433,47,50,13,33,0,13
2,744,868,0.991,1.0,0.302,0.382,0.464,0.553,34,43,13,27,0,13
3,924,833,0.989,1.0,0.367,0.423,0.537,0.595,52,60,22,32,0,22
4,799,1004,0.989,1.0,0.327,0.375,0.493,0.545,48,55,18,24,0,18
5,828,1147,0.986,0.606,0.357,0.444,0.449,0.513,45,56,20,33,13,20
6,931,910,0.987,1.0,0.218,0.286,0.358,0.444,42,55,12,12,0,12
7,819,1134,0.992,1.0,0.438,0.488,0.609,0.656,43,48,21,21,0,21
8,890,902,0.991,1.0,0.5,0.556,0.667,0.714,54,60,30,30,0,30
9,758,1076,0.995,1.0,0.55,0.667,0.71,0.8,33,40,22,22,0,22
10,659,911,0.994,1.0,0.548,0.639,0.708,0.78,36,42,23,23,0,23


In [59]:
####################################################################################################
# EXECUTION TIMES: Results for one-shot execution.
####################################################################################################

import statistics

cols_formats = {'load_data': '{:1.3f} secs',
                'load_dcs': '{:1.3f} secs',
                'detect_errors': '{:1.3f} secs',
                'setup_domain': '{:1.3f} secs',
                'featurize_data': '{:1.3f} secs',
                'setup_model': '{:1.3f} secs',
                'fit_model': '{:1.3f} secs',
                'infer_repairs': '{:1.3f} secs',
                'get_inferred_values': '{:1.3f} secs',
                'generate_repaired_dataset': '{:1.3f} secs'}

dir_name = 'hospital/one_shot'

df_baseline_1 = pd.read_csv(dir_name + '/execution_times/baseline_1.csv',
                            sep=';',
                            index_col=0,
                            dtype=cols_dtype)

dict_baseline_1_mean = {}
for col in df_baseline_1.columns[1:]:
    dict_baseline_1_mean[col] = [statistics.mean(df_baseline_1[col])]
df_baseline_1_mean = pd.DataFrame.from_dict(dict_baseline_1_mean)

df_baseline_1_view = df_baseline_1_mean.style.format(cols_formats)

print(approach_description_list[0])
df_baseline_1_view

1) Baseline - One-Shot Execution


Unnamed: 0,load_data,load_dcs,detect_errors,setup_domain,featurize_data,setup_model,fit_model,infer_repairs,get_inferred_values,generate_repaired_dataset
0,0.155 secs,0.002 secs,0.883 secs,11.171 secs,32.551 secs,0.001 secs,47.246 secs,4.023 secs,0.079 secs,0.191 secs


In [69]:
####################################################################################################
# EXECUTION TIMES: Results for 10 batches.
####################################################################################################

dir_name = 'hospital/10_batches'

df_10b_baseline_2 = pd.read_csv(dir_name + '/execution_times/baseline_2.csv',
                                sep=';',
                                index_col=0,
                                dtype=cols_dtype)

df_10b_proposal_3 = pd.read_csv(dir_name + '/execution_times/proposal_3.csv',
                                sep=';',
                                index_col=0,
                                dtype=cols_dtype)

df_10b_proposal_4 = pd.read_csv(dir_name + '/execution_times/proposal_4.csv',
                                sep=';',
                                index_col=0,
                                dtype=cols_dtype)

df_10b_proposal_5 = pd.read_csv(dir_name + '/execution_times/proposal_5.csv',
                                sep=';',
                                index_col=0,
                                dtype=cols_dtype)

df_10b_proposal_6 = pd.read_csv(dir_name + '/execution_times/proposal_6.csv',
                                sep=';',
                                index_col=0,
                                dtype=cols_dtype)

df_10b_proposal_7 = pd.read_csv(dir_name + '/execution_times/proposal_7.csv',
                                sep=';',
                                index_col=0,
                                dtype=cols_dtype)

df_10b_proposal_5

# df_10b_view_list = [df_10b_baseline_2.style.format(cols_formats),
#                     df_10b_proposal_3.style.format(cols_formats),
#                     df_10b_proposal_4.style.format(cols_formats),
#                     df_10b_proposal_5.style.format(cols_formats),
#                     df_10b_proposal_6.style.format(cols_formats),
#                     df_10b_proposal_7.style.format(cols_formats)]

# for description, df in zip(approach_description_list, df_10b_view_list):
#     print(description)
#     display(df)

Unnamed: 0_level_0,batch,load_data,load_dcs,detect_errors,setup_domain,featurize_data,setup_model,fit_model,infer_repairs,get_inferred_values,generate_repaired_dataset
iteration,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,1,0.060729,0.001686,0.132806,2.367155,1.879604,0.000569,5.204957,0.249075,0.015039,0.529054
1,2,0.058506,0.001696,0.195789,3.018211,3.521861,0.001609,10.092675,0.320564,0.016959,1.023212
1,3,0.058431,0.001676,0.270446,4.151108,6.174028,0.001461,16.037159,0.657651,0.020104,1.413634
1,4,0.058459,0.001685,0.314014,5.01672,9.07099,0.001508,24.06301,0.797037,0.02173,1.880594
1,5,0.058816,0.00166,0.392353,6.200876,12.920699,0.001486,28.788273,1.21951,0.025428,2.224663
1,6,0.058797,0.001696,0.433338,6.628382,12.886218,0.001503,36.456328,1.344365,0.028146,2.5803
1,7,0.05879,0.001693,0.437206,7.386511,16.789925,0.001502,46.120841,1.553133,0.027231,2.913703
1,8,0.058971,0.001695,0.468757,8.764438,19.510513,0.001541,,1.71156,0.028404,3.371702
1,9,0.059066,0.001682,0.513111,9.932729,24.959993,0.001496,,2.043537,0.030042,3.687479
1,10,0.059089,0.001686,0.637337,10.748959,28.002351,0.001485,,2.303251,0.031952,4.003993
