In [1]:
import pandas as pd
import numpy as np
import warnings
import matplotlib.pyplot as plt
import copy

# Stop the execution of a cell (call it with "raise StopExecution")
class StopExecution(Exception):
    def _render_traceback_(self):
        pass

# General Methods

In [2]:
def getPrecision(tp, fp):
    if tp == 0 and fp == 0:
        return 1.0
    return tp / (tp + fp)


def getRecall(tp, fn):
    if tp == 0 and fn == 0:
        return np.nan
    return tp / (tp + fn)


def getF1(tp, fp, fn):
    if tp == 0 and fn == 0: #If the recall is NaN
        return np.nan
    return tp / (tp + 0.5 * (fp + fn))


def printResults(tp, fp, fn):
    print('TP = ' + str(tp))
    print('FP = ' + str(fp))
    print('FN = ' + str(fn))
    print('Precision = {:.3f}'.format(getPrecision(tp, fp)))
    print('Recall = {:.3f}'.format(getRecall(tp, fn)))

# Accuracy on Type of Instance (i.e., Method Name)

In [3]:
def getMethods(string_methods):
    if string_methods == '[]':
        return []
    method_list = string_methods[1:-1].split('), ')
    output = []
    for k, m in enumerate(method_list):
        if k != len(method_list)-1:
            output.append(m + ')')
        else:
            output.append(m)
    return output
                
                
def getGroundTruth(df, oracle='minClientsMaxDuration', minVotes=0):
    if oracle == 'minClientsMaxDuration':
        return getMethods(df[(df['load']==LOAD_TRUTH) & (df['duration']==DURATION_TRUTH)]['methodsAP'].iloc[0])
    elif oracle == 'vote':
        if minVotes > 0:
            polls = {}
            for load in LOADS:
                for duration in DURATIONS:
                    methodList = getMethods(df[(df['load']==load) & (df['duration']==duration)]['methodsAP'].iloc[0])
                    for method in methodList:
                        if method in polls:
                            polls[method] += 1
                        else:
                            polls[method] = 1
            output = []
            for method in polls:
                if polls[method] >= minVotes:
                    output.append(method)
            return output
        else:
            print('Provide the minimum number of votes (minVotes) to consider a method a true positive')
            raise StopExecution


def getTruePositives(df, gt):
    if len(gt) == 0:
        return 0 #If no methods are in gt then there aren't TP
    chk = getMethods(df['methodsAP'].iloc[0])
    count = 0
    for m in gt:
        if m in chk:
            count += 1
    return count


def getFalsePositives(df, gt):
    chk = getMethods(df['methodsAP'].iloc[0])
    count = 0
    for m in gt:
        if m in chk:
            count += 1 #This counts the TP (i.e., methods in gt and in chk)
    return len(chk) - count #If methods in chk are not in gt then those are FP


def getFalseNegatives(df, gt):
    if len(gt) == 0:
        return 0 #If no methods are in gt then there aren't FN
    chk = getMethods(df['methodsAP'].iloc[0])
    count = 0
    for m in gt:
        if m not in chk:
            count += 1
    return count #If methods in gt are not also in chk then those are FN

In [4]:
dfOrig = pd.read_csv('./data/accuracy.csv')

In [5]:
warnings.filterwarnings('ignore')

# General parameters
#SYSTEMS = ['petclinic', 'broadleaf', 'webgoat', 'trainticket', 'openmrs']
#SYSTEM_NAMES = ['PetClinic', 'Broadleaf', 'WebGoat', 'TrainTicket', 'OpenMRS']
SYSTEMS = ['webgoat', 'trainticket', 'openmrs']
SYSTEM_NAMES = ['WebGoat', 'TrainTicket', 'OpenMRS']
LOADS = [25, 50, 75, 100]
DURATIONS = [3, 6, 12]
LOAD_TRUTH = 25
DURATION_TRUTH = 12
MIN_VOTES = 6

# Outputs
SYSTEMS_list = []
for system in SYSTEM_NAMES:
    numSubRows = len(LOADS) * len(DURATIONS)
    SYSTEMS_list.append('\multirow{' + str(numSubRows) + '}{*}{\\rotatebox[origin=c]{90}{\\textbf{' + str(system) + '}}}')
    for i in range(numSubRows-1):
        SYSTEMS_list.append('')
LOADS_list = []
for load in LOADS:
    numSubRows = len(DURATIONS)
    LOADS_list.append('\multirow{' + str(numSubRows) + '}{*}{' + str(load) + '}')
    for i in range(numSubRows - 1):
        LOADS_list.append('')
LOADS_list = LOADS_list * len(SYSTEMS)
DURATIONS_list = DURATIONS * len(LOADS) * len(SYSTEMS)
CTH_list_P = []
EP_list_P = []
WCS_list_P = []
BLOB_list_P = []
TOB_list_P = []
EST_list_P = []
EDA_list_P = []
CTH_list_R = []
EP_list_R = []
WCS_list_R = []
BLOB_list_R = []
TOB_list_R = []
EST_list_R = []
EDA_list_R = []
CTH_list_F = []
EP_list_F = []
WCS_list_F = []
BLOB_list_F = []
TOB_list_F = []
EST_list_F = []
EDA_list_F = []

In [6]:
AP = 'CTH'

COUNT_TH = 5
CPU_TH = 10
OPTION = 'average'

for SYSTEM in SYSTEMS:
    print('### ' + SYSTEM + ' ###')
    dfTmp = dfOrig[(dfOrig['type']==AP) &
               (dfOrig['system']==SYSTEM) &
               (dfOrig['countTh']==COUNT_TH) &
               (dfOrig['cpuTh']==CPU_TH) &
               (dfOrig['option']==OPTION)
              ]
    gt_methods = getGroundTruth(dfTmp, oracle='vote', minVotes=MIN_VOTES)
    for LOAD in LOADS:
        for DURATION in DURATIONS:
            print(str(LOAD) + ' - ' + str(DURATION))
            dfTmp2 = dfTmp[(dfTmp['load']==LOAD) &
                          (dfTmp['duration']==DURATION)
                         ]
            tp = getTruePositives(dfTmp2, gt_methods)
            fp = getFalsePositives(dfTmp2, gt_methods)
            fn = getFalseNegatives(dfTmp2, gt_methods)
            printResults(tp, fp, fn)
            CTH_list_P.append(getPrecision(tp, fp))
            CTH_list_R.append(getRecall(tp, fn))
            CTH_list_F.append(getF1(tp, fp, fn))
    print()

### webgoat ###
25 - 3
TP = 3
FP = 0
FN = 1
Precision = 1.000
Recall = 0.750
25 - 6
TP = 3
FP = 0
FN = 1
Precision = 1.000
Recall = 0.750
25 - 12
TP = 4
FP = 0
FN = 0
Precision = 1.000
Recall = 1.000
50 - 3
TP = 4
FP = 0
FN = 0
Precision = 1.000
Recall = 1.000
50 - 6
TP = 4
FP = 0
FN = 0
Precision = 1.000
Recall = 1.000
50 - 12
TP = 4
FP = 0
FN = 0
Precision = 1.000
Recall = 1.000
75 - 3
TP = 3
FP = 0
FN = 1
Precision = 1.000
Recall = 0.750
75 - 6
TP = 4
FP = 0
FN = 0
Precision = 1.000
Recall = 1.000
75 - 12
TP = 4
FP = 0
FN = 0
Precision = 1.000
Recall = 1.000
100 - 3
TP = 4
FP = 0
FN = 0
Precision = 1.000
Recall = 1.000
100 - 6
TP = 4
FP = 0
FN = 0
Precision = 1.000
Recall = 1.000
100 - 12
TP = 4
FP = 0
FN = 0
Precision = 1.000
Recall = 1.000

### trainticket ###
25 - 3
TP = 1
FP = 0
FN = 0
Precision = 1.000
Recall = 1.000
25 - 6
TP = 1
FP = 0
FN = 0
Precision = 1.000
Recall = 1.000
25 - 12
TP = 1
FP = 0
FN = 0
Precision = 1.000
Recall = 1.000
50 - 3
TP = 1
FP = 0
FN = 0
Precision = 

In [7]:
AP = 'EP'

TIME_TH = 5

for SYSTEM in SYSTEMS:
    print('### ' + SYSTEM + ' ###')
    dfTmp = dfOrig[(dfOrig['type']==AP) &
               (dfOrig['system']==SYSTEM) &
               (dfOrig['methodTimeTh']==TIME_TH)
              ]
    gt_methods = getGroundTruth(dfTmp, oracle='vote', minVotes=MIN_VOTES)
    for LOAD in LOADS:
        for DURATION in DURATIONS:
            print(str(LOAD) + ' - ' + str(DURATION))
            dfTmp2 = dfTmp[(dfTmp['load']==LOAD) &
                          (dfTmp['duration']==DURATION)
                         ]
            tp = getTruePositives(dfTmp2, gt_methods)
            fp = getFalsePositives(dfTmp2, gt_methods)
            fn = getFalseNegatives(dfTmp2, gt_methods)
            printResults(tp, fp, fn)
            EP_list_P.append(getPrecision(tp, fp))
            EP_list_R.append(getRecall(tp, fn))
            EP_list_F.append(getF1(tp, fp, fn))
    print()

### webgoat ###
25 - 3
TP = 6
FP = 0
FN = 1
Precision = 1.000
Recall = 0.857
25 - 6
TP = 6
FP = 0
FN = 1
Precision = 1.000
Recall = 0.857
25 - 12
TP = 7
FP = 0
FN = 0
Precision = 1.000
Recall = 1.000
50 - 3
TP = 7
FP = 0
FN = 0
Precision = 1.000
Recall = 1.000
50 - 6
TP = 7
FP = 0
FN = 0
Precision = 1.000
Recall = 1.000
50 - 12
TP = 7
FP = 0
FN = 0
Precision = 1.000
Recall = 1.000
75 - 3
TP = 6
FP = 0
FN = 1
Precision = 1.000
Recall = 0.857
75 - 6
TP = 7
FP = 0
FN = 0
Precision = 1.000
Recall = 1.000
75 - 12
TP = 7
FP = 0
FN = 0
Precision = 1.000
Recall = 1.000
100 - 3
TP = 7
FP = 0
FN = 0
Precision = 1.000
Recall = 1.000
100 - 6
TP = 7
FP = 0
FN = 0
Precision = 1.000
Recall = 1.000
100 - 12
TP = 7
FP = 0
FN = 0
Precision = 1.000
Recall = 1.000

### trainticket ###
25 - 3
TP = 5
FP = 1
FN = 0
Precision = 0.833
Recall = 1.000
25 - 6
TP = 4
FP = 0
FN = 1
Precision = 1.000
Recall = 0.800
25 - 12
TP = 5
FP = 0
FN = 0
Precision = 1.000
Recall = 1.000
50 - 3
TP = 4
FP = 0
FN = 1
Precision = 

In [8]:
AP = 'WCS'

MEM_TH = 5

for SYSTEM in SYSTEMS:
    print('### ' + SYSTEM + ' ###')
    dfTmp = dfOrig[(dfOrig['type']==AP) &
               (dfOrig['system']==SYSTEM) &
               (dfOrig['memoryUsageTh']==MEM_TH)
              ]
    gt_methods = getGroundTruth(dfTmp, oracle='vote', minVotes=MIN_VOTES)
    for LOAD in LOADS:
        for DURATION in DURATIONS:
            print(str(LOAD) + ' - ' + str(DURATION))
            dfTmp2 = dfTmp[(dfTmp['load']==LOAD) &
                          (dfTmp['duration']==DURATION)
                         ]
            tp = getTruePositives(dfTmp2, gt_methods)
            fp = getFalsePositives(dfTmp2, gt_methods)
            fn = getFalseNegatives(dfTmp2, gt_methods)
            printResults(tp, fp, fn)
            WCS_list_P.append(getPrecision(tp, fp))
            WCS_list_R.append(getRecall(tp, fn))
            WCS_list_F.append(getF1(tp, fp, fn))
    print()

### webgoat ###
25 - 3
TP = 0
FP = 0
FN = 2
Precision = 1.000
Recall = 0.000
25 - 6
TP = 0
FP = 0
FN = 2
Precision = 1.000
Recall = 0.000
25 - 12
TP = 0
FP = 0
FN = 2
Precision = 1.000
Recall = 0.000
50 - 3
TP = 2
FP = 0
FN = 0
Precision = 1.000
Recall = 1.000
50 - 6
TP = 1
FP = 0
FN = 1
Precision = 1.000
Recall = 0.500
50 - 12
TP = 2
FP = 0
FN = 0
Precision = 1.000
Recall = 1.000
75 - 3
TP = 1
FP = 0
FN = 1
Precision = 1.000
Recall = 0.500
75 - 6
TP = 2
FP = 0
FN = 0
Precision = 1.000
Recall = 1.000
75 - 12
TP = 2
FP = 0
FN = 0
Precision = 1.000
Recall = 1.000
100 - 3
TP = 2
FP = 0
FN = 0
Precision = 1.000
Recall = 1.000
100 - 6
TP = 2
FP = 0
FN = 0
Precision = 1.000
Recall = 1.000
100 - 12
TP = 2
FP = 0
FN = 0
Precision = 1.000
Recall = 1.000

### trainticket ###
25 - 3
TP = 1
FP = 0
FN = 0
Precision = 1.000
Recall = 1.000
25 - 6
TP = 1
FP = 1
FN = 0
Precision = 0.500
Recall = 1.000
25 - 12
TP = 1
FP = 1
FN = 0
Precision = 0.500
Recall = 1.000
50 - 3
TP = 0
FP = 0
FN = 1
Precision = 

In [9]:
AP = 'BLOB'

MSG_TH = 5
CPU_TH = 10
HEAP_TH = 10

for SYSTEM in SYSTEMS:
    print('### ' + SYSTEM + ' ###')
    dfTmp = dfOrig[(dfOrig['type']==AP) &
               (dfOrig['system']==SYSTEM) &
               (dfOrig['numMsgs']==MSG_TH) &
               (dfOrig['cpuTh']==CPU_TH) &
               (dfOrig['heapTh']==HEAP_TH)
              ]
    gt_methods = getGroundTruth(dfTmp, oracle='vote', minVotes=MIN_VOTES)
    for LOAD in LOADS:
        for DURATION in DURATIONS:
            print(str(LOAD) + ' - ' + str(DURATION))
            dfTmp2 = dfTmp[(dfTmp['load']==LOAD) &
                          (dfTmp['duration']==DURATION)
                         ]
            tp = getTruePositives(dfTmp2, gt_methods)
            fp = getFalsePositives(dfTmp2, gt_methods)
            fn = getFalseNegatives(dfTmp2, gt_methods)
            printResults(tp, fp, fn)
            BLOB_list_P.append(getPrecision(tp, fp))
            BLOB_list_R.append(getRecall(tp, fn))
            BLOB_list_F.append(getF1(tp, fp, fn))
    print()

### webgoat ###
25 - 3
TP = 4
FP = 1
FN = 1
Precision = 0.800
Recall = 0.800
25 - 6
TP = 4
FP = 1
FN = 1
Precision = 0.800
Recall = 0.800
25 - 12
TP = 5
FP = 1
FN = 0
Precision = 0.833
Recall = 1.000
50 - 3
TP = 5
FP = 0
FN = 0
Precision = 1.000
Recall = 1.000
50 - 6
TP = 5
FP = 0
FN = 0
Precision = 1.000
Recall = 1.000
50 - 12
TP = 5
FP = 0
FN = 0
Precision = 1.000
Recall = 1.000
75 - 3
TP = 4
FP = 1
FN = 1
Precision = 0.800
Recall = 0.800
75 - 6
TP = 5
FP = 0
FN = 0
Precision = 1.000
Recall = 1.000
75 - 12
TP = 5
FP = 0
FN = 0
Precision = 1.000
Recall = 1.000
100 - 3
TP = 5
FP = 0
FN = 0
Precision = 1.000
Recall = 1.000
100 - 6
TP = 5
FP = 0
FN = 0
Precision = 1.000
Recall = 1.000
100 - 12
TP = 5
FP = 0
FN = 0
Precision = 1.000
Recall = 1.000

### trainticket ###
25 - 3
TP = 1
FP = 0
FN = 0
Precision = 1.000
Recall = 1.000
25 - 6
TP = 1
FP = 0
FN = 0
Precision = 1.000
Recall = 1.000
25 - 12
TP = 1
FP = 0
FN = 0
Precision = 1.000
Recall = 1.000
50 - 3
TP = 1
FP = 0
FN = 0
Precision = 

In [10]:
AP = 'TOB'

TIME_TH = 5

for SYSTEM in SYSTEMS:
    print('### ' + SYSTEM + ' ###')
    dfTmp = dfOrig[(dfOrig['type']==AP) &
               (dfOrig['system']==SYSTEM) &
               (dfOrig['methodTimeTh']==TIME_TH)
              ]
    gt_methods = getGroundTruth(dfTmp, oracle='vote', minVotes=MIN_VOTES)
    for LOAD in LOADS:
        for DURATION in DURATIONS:
            print(str(LOAD) + ' - ' + str(DURATION))
            dfTmp2 = dfTmp[(dfTmp['load']==LOAD) &
                          (dfTmp['duration']==DURATION)
                         ]
            tp = getTruePositives(dfTmp2, gt_methods)
            fp = getFalsePositives(dfTmp2, gt_methods)
            fn = getFalseNegatives(dfTmp2, gt_methods)
            printResults(tp, fp, fn)
            TOB_list_P.append(getPrecision(tp, fp))
            TOB_list_R.append(getRecall(tp, fn))
            TOB_list_F.append(getF1(tp, fp, fn))
    print()

### webgoat ###
25 - 3
TP = 4
FP = 0
FN = 0
Precision = 1.000
Recall = 1.000
25 - 6
TP = 4
FP = 0
FN = 0
Precision = 1.000
Recall = 1.000
25 - 12
TP = 4
FP = 0
FN = 0
Precision = 1.000
Recall = 1.000
50 - 3
TP = 4
FP = 0
FN = 0
Precision = 1.000
Recall = 1.000
50 - 6
TP = 4
FP = 0
FN = 0
Precision = 1.000
Recall = 1.000
50 - 12
TP = 4
FP = 0
FN = 0
Precision = 1.000
Recall = 1.000
75 - 3
TP = 4
FP = 0
FN = 0
Precision = 1.000
Recall = 1.000
75 - 6
TP = 4
FP = 0
FN = 0
Precision = 1.000
Recall = 1.000
75 - 12
TP = 4
FP = 0
FN = 0
Precision = 1.000
Recall = 1.000
100 - 3
TP = 4
FP = 0
FN = 0
Precision = 1.000
Recall = 1.000
100 - 6
TP = 4
FP = 0
FN = 0
Precision = 1.000
Recall = 1.000
100 - 12
TP = 4
FP = 0
FN = 0
Precision = 1.000
Recall = 1.000

### trainticket ###
25 - 3
TP = 0
FP = 0
FN = 0
Precision = 1.000
Recall = nan
25 - 6
TP = 0
FP = 0
FN = 0
Precision = 1.000
Recall = nan
25 - 12
TP = 0
FP = 0
FN = 0
Precision = 1.000
Recall = nan
50 - 3
TP = 0
FP = 0
FN = 0
Precision = 1.000


In [11]:
AP = 'EST'

MSGS_TH = 5

for SYSTEM in SYSTEMS:
    print('### ' + SYSTEM + ' ###')
    dfTmp = dfOrig[(dfOrig['type']==AP) &
               (dfOrig['system']==SYSTEM) &
               (dfOrig['numMsgs']==MSGS_TH)
              ]
    gt_methods = getGroundTruth(dfTmp, oracle='vote', minVotes=MIN_VOTES)
    for LOAD in LOADS:
        for DURATION in DURATIONS:
            print(str(LOAD) + ' - ' + str(DURATION))
            dfTmp2 = dfTmp[(dfTmp['load']==LOAD) &
                          (dfTmp['duration']==DURATION)
                         ]
            tp = getTruePositives(dfTmp2, gt_methods)
            fp = getFalsePositives(dfTmp2, gt_methods)
            fn = getFalseNegatives(dfTmp2, gt_methods)
            printResults(tp, fp, fn)
            EST_list_P.append(getPrecision(tp, fp))
            EST_list_R.append(getRecall(tp, fn))
            EST_list_F.append(getF1(tp, fp, fn))
    print()

### webgoat ###
25 - 3
TP = 0
FP = 0
FN = 0
Precision = 1.000
Recall = nan
25 - 6
TP = 0
FP = 0
FN = 0
Precision = 1.000
Recall = nan
25 - 12
TP = 0
FP = 0
FN = 0
Precision = 1.000
Recall = nan
50 - 3
TP = 0
FP = 1
FN = 0
Precision = 0.000
Recall = nan
50 - 6
TP = 0
FP = 1
FN = 0
Precision = 0.000
Recall = nan
50 - 12
TP = 0
FP = 0
FN = 0
Precision = 1.000
Recall = nan
75 - 3
TP = 0
FP = 0
FN = 0
Precision = 1.000
Recall = nan
75 - 6
TP = 0
FP = 0
FN = 0
Precision = 1.000
Recall = nan
75 - 12
TP = 0
FP = 0
FN = 0
Precision = 1.000
Recall = nan
100 - 3
TP = 0
FP = 0
FN = 0
Precision = 1.000
Recall = nan
100 - 6
TP = 0
FP = 0
FN = 0
Precision = 1.000
Recall = nan
100 - 12
TP = 0
FP = 1
FN = 0
Precision = 0.000
Recall = nan

### trainticket ###
25 - 3
TP = 1
FP = 0
FN = 0
Precision = 1.000
Recall = 1.000
25 - 6
TP = 1
FP = 0
FN = 0
Precision = 1.000
Recall = 1.000
25 - 12
TP = 1
FP = 0
FN = 0
Precision = 1.000
Recall = 1.000
50 - 3
TP = 1
FP = 0
FN = 0
Precision = 1.000
Recall = 1.000
50 

In [12]:
AP = 'EDA'

OBJS_TH = 5
HEAP_TH = 10

for SYSTEM in SYSTEMS:
    print('### ' + SYSTEM + ' ###')
    dfTmp = dfOrig[(dfOrig['type']==AP) &
               (dfOrig['system']==SYSTEM) &
               (dfOrig['numGCedObjsTh']==OBJS_TH) &
               (dfOrig['heapTh']==HEAP_TH)
              ]
    gt_methods = getGroundTruth(dfTmp, oracle='vote', minVotes=MIN_VOTES)
    for LOAD in LOADS:
        for DURATION in DURATIONS:
            print(str(LOAD) + ' - ' + str(DURATION))
            dfTmp2 = dfTmp[(dfTmp['load']==LOAD) &
                          (dfTmp['duration']==DURATION)
                         ]
            tp = getTruePositives(dfTmp2, gt_methods)
            fp = getFalsePositives(dfTmp2, gt_methods)
            fn = getFalseNegatives(dfTmp2, gt_methods)
            printResults(tp, fp, fn)
            EDA_list_P.append(getPrecision(tp, fp))
            EDA_list_R.append(getRecall(tp, fn))
            EDA_list_F.append(getF1(tp, fp, fn))
    print()

### webgoat ###
25 - 3
TP = 3
FP = 0
FN = 1
Precision = 1.000
Recall = 0.750
25 - 6
TP = 3
FP = 0
FN = 1
Precision = 1.000
Recall = 0.750
25 - 12
TP = 4
FP = 0
FN = 0
Precision = 1.000
Recall = 1.000
50 - 3
TP = 4
FP = 0
FN = 0
Precision = 1.000
Recall = 1.000
50 - 6
TP = 4
FP = 0
FN = 0
Precision = 1.000
Recall = 1.000
50 - 12
TP = 4
FP = 0
FN = 0
Precision = 1.000
Recall = 1.000
75 - 3
TP = 3
FP = 0
FN = 1
Precision = 1.000
Recall = 0.750
75 - 6
TP = 4
FP = 0
FN = 0
Precision = 1.000
Recall = 1.000
75 - 12
TP = 4
FP = 0
FN = 0
Precision = 1.000
Recall = 1.000
100 - 3
TP = 4
FP = 0
FN = 0
Precision = 1.000
Recall = 1.000
100 - 6
TP = 4
FP = 0
FN = 0
Precision = 1.000
Recall = 1.000
100 - 12
TP = 4
FP = 0
FN = 0
Precision = 1.000
Recall = 1.000

### trainticket ###
25 - 3
TP = 0
FP = 0
FN = 0
Precision = 1.000
Recall = nan
25 - 6
TP = 0
FP = 0
FN = 0
Precision = 1.000
Recall = nan
25 - 12
TP = 0
FP = 0
FN = 0
Precision = 1.000
Recall = nan
50 - 3
TP = 0
FP = 0
FN = 0
Precision = 1.000


In [13]:
#with open('./results/precision_table.tex', 'w') as f:
#    f.write('\\begin{table}[t] \n')
#    f.write('\centering \n')
#    f.write('\caption{Precision} \n')
#    f.write('\label{tab:precision} \n')
#    f.write('\\resizebox{\linewidth}{!}{ \n')
#    f.write('\\begin{tabular}{rrrrrrrrrr} \n')
#    f.write('\\toprule \n')
#    f.write('&  & \emph{duration} &  &  &  &  &  &  & \\\\ \n')
#    f.write('& \emph{\#clients} & \emph{(min)} & \emph{CTH} & \emph{EP} & \emph{WCS} & \emph{Blob} & \emph{ToB} & \emph{EST} & \emph{EDA} \\\\ \n')
#    f.write('\midrule \n')
#    idx = 0
#    for s, l, d, cth, ep, wcs, blob, tob, est, eda in zip(SYSTEMS_list, LOADS_list, DURATIONS_list, CTH_list_P, EP_list_P, WCS_list_P, BLOB_list_P, TOB_list_P, EST_list_P, EDA_list_P):
#        f.write(str(s) + ' & ' + str(l) + ' & ' + str(d) + ' & {:.2f} & {:.2f} & {:.2f} & {:.2f} & {:.2f} & {:.2f} & {:.2f} \\\\ \n'.format(cth, ep, wcs, blob, tob, est, eda))
#        idx += 1
#        if idx % (len(LOADS) * len(DURATIONS)) == 0:
#            f.write('\midrule \n')
#            idx = 0
#        elif idx % len(DURATIONS) == 0:
#            f.write('\cline{2-10} \n')
#    f.write('\end{tabular}} \n')
#    f.write('\end{table} \n')

In [14]:
#with open('./results/recall_table.tex', 'w') as f:
#    f.write('\\begin{table}[t] \n')
#    f.write('\centering \n')
#    f.write('\caption{Recall} \n')
#    f.write('\label{tab:recall} \n')
#    f.write('\\resizebox{\linewidth}{!}{ \n')
#    f.write('\\begin{tabular}{rrrrrrrrrr} \n')
#    f.write('\\toprule \n')
#    f.write('&  & \emph{duration} &  &  &  &  &  &  & \\\\ \n')
#    f.write('& \emph{\#clients} & \emph{(min)} & \emph{CTH} & \emph{EP} & \emph{WCS} & \emph{Blob} & \emph{ToB} & \emph{EST} & \emph{EDA} \\\\ \n')
#    f.write('\midrule \n')
#    idx = 0
#    for s, l, d, cth, ep, wcs, blob, tob, est, eda in zip(SYSTEMS_list, LOADS_list, DURATIONS_list, CTH_list_R, EP_list_R, WCS_list_R, BLOB_list_R, TOB_list_R, EST_list_R, EDA_list_R):
#        f.write(str(s) + ' & ' + str(l) + ' & ' + str(d) + ' & {:.2f} & {:.2f} & {:.2f} & {:.2f} & {:.2f} & {:.2f} & {:.2f} \\\\ \n'.format(cth, ep, wcs, blob, tob, est, eda))
#        idx += 1
#        if idx % (len(LOADS) * len(DURATIONS)) == 0:
#            f.write('\midrule \n')
#            idx = 0
#        elif idx % len(DURATIONS) == 0:
#            f.write('\cline{2-10} \n')
#    f.write('\end{tabular}} \n')
#    f.write('\end{table} \n')

In [15]:
#with open('./results/f1_table.tex', 'w') as f:
#    f.write('\\begin{table}[t] \n')
#    f.write('\centering \n')
#    f.write('\caption{F1-score} \n')
#    f.write('\label{tab:f1} \n')
#    f.write('\\resizebox{\linewidth}{!}{ \n')
#    f.write('\\begin{tabular}{rrrrrrrrrr} \n')
#    f.write('\\toprule \n')
#    f.write('&  & \emph{duration} &  &  &  &  &  &  & \\\\ \n')
#    f.write('& \emph{\#clients} & \emph{(min)} & \emph{CTH} & \emph{EP} & \emph{WCS} & \emph{Blob} & \emph{ToB} & \emph{EST} & \emph{EDA} \\\\ \n')
#    f.write('\midrule \n')
#    idx = 0
#    for s, l, d, cth, ep, wcs, blob, tob, est, eda in zip(SYSTEMS_list, LOADS_list, DURATIONS_list, CTH_list_F, EP_list_F, WCS_list_F, BLOB_list_F, TOB_list_F, EST_list_F, EDA_list_F):
#        f.write(str(s) + ' & ' + str(l) + ' & ' + str(d) + ' & {:.2f} & {:.2f} & {:.2f} & {:.2f} & {:.2f} & {:.2f} & {:.2f} \\\\ \n'.format(cth, ep, wcs, blob, tob, est, eda))
#        idx += 1
#        if idx % (len(LOADS) * len(DURATIONS)) == 0:
#            f.write('\midrule \n')
#            idx = 0
#        elif idx % len(DURATIONS) == 0:
#            f.write('\cline{2-10} \n')
#    f.write('\end{tabular}} \n')
#    f.write('\end{table} \n')

In [16]:
warnings.filterwarnings('ignore')

# General parameters
#SYSTEMS = ['petclinic', 'broadleaf', 'webgoat', 'trainticket', 'openmrs']
#SYSTEM_NAMES = ['PetClinic', 'Broadleaf', 'WebGoat', 'TrainTicket', 'OpenMRS']
SYSTEMS = ['webgoat', 'trainticket', 'openmrs']
SYSTEM_NAMES = ['WebGoat', 'TrainTicket', 'OpenMRS']
MEASURES = ['P', 'R', 'F1']

# Outputs
SYSTEMS_list = []
for system in SYSTEM_NAMES:
    numSubRows = len(MEASURES)
    SYSTEMS_list.append('\multirow{' + str(numSubRows) + '}{*}{\\rotatebox[origin=c]{90}{\\textbf{' + str(system) + '}}}')
    for i in range(numSubRows-1):
        SYSTEMS_list.append('')
MEASURES_list = MEASURES * len(SYSTEMS)

In [17]:
STEP = len(LOADS) * len(DURATIONS)

CTH_sys_P = [np.mean(CTH_list_P[x*STEP:(x+1)*STEP]) for x in range(len(SYSTEMS))]
CTH_sys_R = [np.mean(CTH_list_R[x*STEP:(x+1)*STEP]) for x in range(len(SYSTEMS))]
CTH_sys_F = [2 * CTH_sys_P[x] * CTH_sys_R[x] / (CTH_sys_P[x] + CTH_sys_R[x]) for x in range(len(CTH_sys_P))]

EP_sys_P = [np.mean(EP_list_P[x*STEP:(x+1)*STEP]) for x in range(len(SYSTEMS))]
EP_sys_R = [np.mean(EP_list_R[x*STEP:(x+1)*STEP]) for x in range(len(SYSTEMS))]
EP_sys_F = [2 * EP_sys_P[x] * EP_sys_R[x] / (EP_sys_P[x] + EP_sys_R[x]) for x in range(len(EP_sys_P))]

WCS_sys_P = [np.mean(WCS_list_P[x*STEP:(x+1)*STEP]) for x in range(len(SYSTEMS))]
WCS_sys_R = [np.mean(WCS_list_R[x*STEP:(x+1)*STEP]) for x in range(len(SYSTEMS))]
WCS_sys_F = [2 * WCS_sys_P[x] * WCS_sys_R[x] / (WCS_sys_P[x] + WCS_sys_R[x]) for x in range(len(WCS_sys_P))]

BLOB_sys_P = [np.mean(BLOB_list_P[x*STEP:(x+1)*STEP]) for x in range(len(SYSTEMS))]
BLOB_sys_R = [np.mean(BLOB_list_R[x*STEP:(x+1)*STEP]) for x in range(len(SYSTEMS))]
BLOB_sys_F = [2 * BLOB_sys_P[x] * BLOB_sys_R[x] / (BLOB_sys_P[x] + BLOB_sys_R[x]) for x in range(len(BLOB_sys_P))]

TOB_sys_P = [np.mean(TOB_list_P[x*STEP:(x+1)*STEP]) for x in range(len(SYSTEMS))]
TOB_sys_R = [np.mean(TOB_list_R[x*STEP:(x+1)*STEP]) for x in range(len(SYSTEMS))]
TOB_sys_F = [2 * TOB_sys_P[x] * TOB_sys_R[x] / (TOB_sys_P[x] + TOB_sys_R[x]) for x in range(len(TOB_sys_P))]

EST_sys_P = [np.mean(EST_list_P[x*STEP:(x+1)*STEP]) for x in range(len(SYSTEMS))]
EST_sys_R = [np.mean(EST_list_R[x*STEP:(x+1)*STEP]) for x in range(len(SYSTEMS))]
EST_sys_F = [2 * EST_sys_P[x] * EST_sys_R[x] / (EST_sys_P[x] + EST_sys_R[x]) for x in range(len(EST_sys_P))]

EDA_sys_P = [np.mean(EDA_list_P[x*STEP:(x+1)*STEP]) for x in range(len(SYSTEMS))]
EDA_sys_R = [np.mean(EDA_list_R[x*STEP:(x+1)*STEP]) for x in range(len(SYSTEMS))]
EDA_sys_F = [2 * EDA_sys_P[x] * EDA_sys_R[x] / (EDA_sys_P[x] + EDA_sys_R[x]) for x in range(len(EDA_sys_P))]

In [18]:
with open('./results/acc_table.tex', 'w') as f:
    f.write('\\begin{table}[t] \n')
    f.write('\centering \n')
    f.write('\caption{Accuracy} \n')
    f.write('\label{tab:acc} \n')
    f.write('\\resizebox{\linewidth}{!}{ \n')
    f.write('\\begin{tabular}{rr|rrrrrrr} \n')
    f.write('\\toprule \n')
    f.write('&  &  &  &  &  &  &  & \\\\ \n')
    f.write('&  & \emph{CTH} & \emph{EP} & \emph{WCS} & \emph{Blob} & \emph{ToB} & \emph{EST} & \emph{EDA} \\\\ \n')
    f.write('\midrule \n')
    idx = 0
    HEADERS_list = []
    for s, meas in zip (SYSTEMS_list, MEASURES_list):
        HEADERS_list.append(str(s) + ' & ' + str(meas))
    idx = 0
    idx2 = 0
    while idx < len(HEADERS_list):
        f.write(HEADERS_list[idx] + ' & {:.2f} & {:.2f} & {:.2f} & {:.2f} & {:.2f} & {:.2f} & {:.2f} \\\\ \n'.format(CTH_sys_P[idx2], EP_sys_P[idx2], WCS_sys_P[idx2], BLOB_sys_P[idx2], TOB_sys_P[idx2], EST_sys_P[idx2], EDA_sys_P[idx2]))
        idx += 1
        f.write(HEADERS_list[idx] + ' & {:.2f} & {:.2f} & {:.2f} & {:.2f} & {:.2f} & {:.2f} & {:.2f} \\\\ \n'.format(CTH_sys_R[idx2], EP_sys_R[idx2], WCS_sys_R[idx2], BLOB_sys_R[idx2], TOB_sys_R[idx2], EST_sys_R[idx2], EDA_sys_R[idx2]))
        idx += 1
        f.write(HEADERS_list[idx] + ' & {:.2f} & {:.2f} & {:.2f} & {:.2f} & {:.2f} & {:.2f} & {:.2f} \\\\ \n'.format(CTH_sys_F[idx2], EP_sys_F[idx2], WCS_sys_F[idx2], BLOB_sys_F[idx2], TOB_sys_F[idx2], EST_sys_F[idx2], EDA_sys_F[idx2]))
        idx += 1
        idx2 += 1
        f.write('\midrule \n')
    f.write('\midrule \n')
    f.write('\\multirow{3}{*}{\\rotatebox[origin=c]{90}{\\textbf{Overall}}} & P' + ' & {:.2f} & {:.2f} & {:.2f} & {:.2f} & {:.2f} & {:.2f} & {:.2f} \\\\ \n'.format(np.nanmean(CTH_sys_P), np.nanmean(EP_sys_P), np.nanmean(WCS_sys_P), np.nanmean(BLOB_sys_P), np.nanmean(TOB_sys_P), np.nanmean(EST_sys_P), np.nanmean(EDA_sys_P)))
    f.write('& R & {:.2f} & {:.2f} & {:.2f} & {:.2f} & {:.2f} & {:.2f} & {:.2f} \\\\ \n'.format(np.nanmean(CTH_sys_R), np.nanmean(EP_sys_R), np.nanmean(WCS_sys_R), np.nanmean(BLOB_sys_R), np.nanmean(TOB_sys_R), np.nanmean(EST_sys_R), np.nanmean(EDA_sys_R)))
    f.write('& F1 & {:.2f} & {:.2f} & {:.2f} & {:.2f} & {:.2f} & {:.2f} & {:.2f} \\\\ \n'.format(np.nanmean(CTH_sys_F), np.nanmean(EP_sys_F), np.nanmean(WCS_sys_F), np.nanmean(BLOB_sys_F), np.nanmean(TOB_sys_F), np.nanmean(EST_sys_F), np.nanmean(EDA_sys_F)))
    f.write('\midrule \n')
    f.write('\end{tabular}} \n')
    f.write('\end{table} \n')