# Output Processor

In [None]:
import os
import json
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
## Configurable
# Number of max and mins to drop
n_drop = 2

In [None]:
with open('config.json', 'r') as read_file:
    conf = json.load(read_file)
    
data = conf['data']
queries = conf['queries']
runs = conf['runs']

queryDirs = []
#Calling function to greate data sets
for task in data:
    path = './src/output/'
    for subT in data[task]:
        path_subT = path + task + '/' + subT + '/'
        for q in queries:
            queryDirs.append(path_subT + q + '/')

conf['data']

In [None]:
## General functions

# Dropping n * (min and max) from runtimes
def dropMinAndMax(n_drop, timeList):
    for i in range(n_drop):
        if timeList.size > 3:
            timeList = timeList.reset_index(drop=True)
            timeList = timeList.drop(timeList.values.argmax())
            timeList = timeList.reset_index(drop=True)
            timeList = timeList.drop(timeList.values.argmin())
        return timeList

# Function for saving figures
def saveFig(label):
    result_path = './results/'
    if not os.path.exists(path):
        os.makedirs(path)
    plt.savefig(result_path + label + '_plot.png')
    

In [None]:
# Creating general dataframe with all results and experimental details

dfResult = pd.DataFrame(columns = ['task', 'subT', 'queryNr', 'run', 'time', 'result', 'pods', 'posts', 'auths', 'acEnforce'])
c = 0
error_count = 0
error_list = []
# Reading in of output files
for directory in queryDirs:  

    for r in range(runs):
        run = 'run' + str(r + 1) + '.json'
        path = directory + run
        if os.path.isfile(path):
            with open(path, 'r') as f:  
                file = json.load(f)
                # Retrieving result data from output file
                for columns in ['task', 'subT', 'queryNr', 'run', 'time', 'result']:
                    dfResult.at[c, columns] = file[columns]
                # Retrieving experimentdata from configuration file
                for columns in ['pods', 'posts', 'auths', 'acEnforce']:
                    dfResult.at[c, columns] = data[file['task']][file['subT']][columns]
                c += 1
        else:
            error_count += 1
            error_list.append(path)

# Converting Miliseconds to seconds
dfResult['time'] = dfResult['time'].apply(lambda x: x/1000)

# Calculation of aQET and aQMET
for task in data:
    for subT in data[task]:
        for q in queries:
            # calculate query execution time for each query
            timeList = dfResult[(dfResult.subT == subT) & (dfResult.queryNr == q)].time
            timeList = dropMinAndMax(n_drop, timeList)
            aQET = round(timeList.mean(),3)
            dfResult.loc[dfResult[(dfResult.subT == subT) & (dfResult.queryNr == q)].index, 'aQET'] = aQET

        # calculate average query execution time over all queries
        timeList = dfResult[(dfResult.subT == subT)].aQET
        timeList = dropMinAndMax(n_drop, timeList)
        aQMET = round(timeList.mean(),3)
        dfResult.loc[dfResult[(dfResult.subT == subT)].index, 'aQMET'] = aQMET




# Saving Dataframe including all data
dfResult.to_csv('./results/results_table.csv')

# Print the runs the failed and the total amount
total_runs = error_count + c 
print('Number or failed runs: ', error_count, ' of ', total_runs)
print('Paths with error: ')
for error in error_list:
    print('  -', error)

dfResult.head(5)


## Personalised plots

In [None]:
#dfResult = pd.read_csv('./other/results/results5/results_table.csv')
dfQueries = dfResult.copy()

In [None]:
# aQMET dataframe
dfResult = dfResult[['task', 'subT', 'aQMET', 'pods', 'posts', 'auths', 'acEnforce']] #, 'aQET', 'queryNr'
dfResult = dfResult.drop_duplicates()
dfResult = dfResult.reset_index(drop = True)
dfResult

In [None]:
# aQET daraframe
dfQueries = dfQueries[['task', 'subT', 'pods', 'posts', 'auths', 'acEnforce', 'aQET', 'queryNr']] #different queries graph

# t# & q1
dfQueries = dfQueries[(dfQueries.queryNr == 'q1')]
dfQueries = dfQueries.drop_duplicates()
dfQueries = dfQueries.reset_index(drop = True)
dfQueries



In [None]:
from matplotlib.ticker import ScalarFormatter

def drawComparisonPlot(t1, t2, df, xaxis, timeMeasure, xlabel):
    AccEnforced = df[(df.task == t1)][timeMeasure]
    AccNotEnforced = df[(df.task == t2)][timeMeasure]
    
    if t1 =='t5':
        x = pd.Series([33, 66, 100])
        plt.plot(x,AccEnforced, 'bo-', label = 'Access Enforced')
        plt.plot(x,AccNotEnforced, 'r+-', label = 'Access Not Enforced')
    else:
        #x = df[(df.task == t1)][xaxis]
        x = pd.Series([2, 4, 8, 16, 32])
        
        plt.semilogx(x,AccEnforced, 'bo-', label = 'Access Enforced', basex=2)
        plt.semilogx(x,AccNotEnforced, 'r+-', label = 'Access Not Enforced', basex=2)
        
        #Logarithmic y scale
        #plt.loglog(x,AccEnforced, 'bo-',label = 'Access Enforced', basex=2, basey=2)
        #plt.loglog(x,AccNotEnforced, 'r+-', label = 'Access not Enforced', basex=2, basey=2)
        
        plt.xticks([2, 4, 8, 16, 32], ['2', '4', '8', '16', '32'])
        #plt.yticks([0.01, 0.9, 1, 1.1, 1.2, 1.3, 2], ['0', '0.9', '1', '1.1', '1.2', '1.3', '2'])
        
    plt.ylim(bottom=0, top=1.5)
#    plt.ylim(bottom=0, top=1400)
#    plt.ylim(bottom=2, top=2**12)
    
    #plt.xlabel(xaxis.capitalize())
    plt.xlabel(xlabel)
    plt.ylabel(str(timeMeasure) + ' in s')
    #plt.title(str(timeMeasure) + ' for Increse in ' + xaxis.capitalize())
    plt.legend(loc='best')
    plt.rcParams["font.size"] = "15"

    plt.tight_layout()
    saveFig(str(xaxis))
   
    plt.show()
    plt.close()

In [None]:
def drawOverheadPlot(t1, t2, df, xaxis, timeMeasure, xlabel):
    AccEnforced = df[(df.task == t1)][timeMeasure]
    AccNotEnforced = df[(df.task == t2)][timeMeasure]
    overhead = AccEnforced.divide(AccNotEnforced.values)
    overhead = overhead.sub(1)
    overhead = overhead.mul(100)
    
    if t1 =='t5':
        x = pd.Series([33, 66, 100])
        plt.plot(x, overhead, 'g')
    else:
        #x = df[(df.task == t1)][xaxis]
        x = pd.Series([2, 4, 8, 16, 32])
        plt.semilogx(x, overhead, 'g', basex=2)
        plt.xticks([2, 4, 8, 16, 32], ['2', '4', '8', '16', '32'])
    
    plt.ylim(bottom=0, top=100)
    
    #plt.xlabel(xaxis.capitalize())
    plt.xlabel(xlabel)
    plt.ylabel(str(timeMeasure) + ' Overhead (%)')
    #plt.title(str(timeMeasure) + ' Overhead for Increse in ' + xaxis.capitalize())
    #plt.legend(loc='best')

    plt.tight_layout()
    saveFig('overhead_' + str(xaxis))
       

    plt.show()
    plt.close()

In [None]:
if dfResult[dfResult.task == 't2'].shape[0] > 0:
    drawComparisonPlot('t1', 't2', dfResult, 'pods', 'aQMET', 'Number of Pods')

In [None]:
if dfResult[dfResult.task == 't2'].shape[0] > 0:
    drawOverheadPlot('t1', 't2', dfResult, 'pods', 'aQMET', 'Number of Pods')

In [None]:
if dfResult[dfResult.task == 't4'].shape[0] > 0:
    drawComparisonPlot('t3', 't4', dfResult ,'posts', 'aQMET', 'Number of Posts per Pod')

In [None]:
if dfResult[dfResult.task == 't4'].shape[0] > 0:
    drawOverheadPlot('t3', 't4', dfResult ,'posts', 'aQMET', 'Number of Posts per Pod')

In [None]:
if dfResult[dfResult.task == 't6'].shape[0] > 0:
    drawComparisonPlot('t5', 't6', dfQueries, 'auth', 'aQET', '% of Profile authorised')

In [None]:
if dfResult[dfResult.task == 't6'].shape[0] > 0:
    drawOverheadPlot('t5', 't6', dfQueries, 'auth', 'aQET', '% of Profile authorised')