# Output Processor

In [None]:
import os
import json
from pprint import pprint # Delete
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
# Number of max and mins to drop
n_drop = 1

In [None]:
with open('config.json', 'r') as read_file:
    conf = json.load(read_file)
    pprint(conf['data'])
    
data = conf['data']
queries = conf['queries']
runs = conf['runs']
queryDirs = []
#Calling function to greate data sets
for task in data:
    path = './src/output/'
    for subT in data[task]:
        path_subT = path + task + '/' + subT + '/'
        for q in queries:
            queryDirs.append(path_subT + q + '/')


In [None]:
## General functions

# Dropping n * (min and max) from runtimes
def dropMinAndMax(n_drop, timeList):
    for i in range(n_drop):
        timeList = timeList.reset_index(drop=True)
        timeList = timeList.drop(timeList.values.argmax())
        timeList = timeList.reset_index(drop=True)
        timeList = timeList.drop(timeList.values.argmin())
    return timeList

# Function for saving figures
def saveFig(label):
    path = './results/'
    try:
        os.makedirs(path)
    except OSError:
        print ('Directory: ' + str(path) + ' already exists')
    else: 
        pass
    plt.savefig('./results/' + label + '_plot.png')
    

In [None]:
# Creating general dataframe with all results and experimental details

dfResult = pd.DataFrame(columns = ['task', 'subT', 'queryNr', 'run', 'time', 'result', 'pods', 'posts', 'auths', 'acEnforce'])
c = 0
# Reading in of output files
for directory in queryDirs:  

    for r in range(runs):
        run = 'run' + str(r + 1) + '.json'
        path = directory + run
        with open(path, 'r') as f:  
            file = json.load(f)
            # Retrieving result data from output file
            for columns in ['task', 'subT', 'queryNr', 'run', 'time', 'result']:
                dfResult.at[c, columns] = file[columns]
            # Retrieving experimentdata from configuration file
            for columns in ['pods', 'posts', 'auths', 'acEnforce']:
                dfResult.at[c, columns] = data[file['task']][file['subT']][columns]
                
            c += 1

# Calculation of QET and aQEToA
for task in data:
    for subT in data[task]:
        for q in queries:
            # calculate query execution time for each query
            timeList = dfResult[(dfResult.subT == subT) & (dfResult.queryNr == q)].time
            timeList = dropMinAndMax(n_drop, timeList)
            QET = round(timeList.mean(),0)
            dfResult.loc[dfResult[(dfResult.subT == subT) & (dfResult.queryNr == q)].index, 'QET'] = QET

        # calculate average query execution time over all queries
        timeList = dfResult[(dfResult.subT == subT)].QET
        timeList = dropMinAndMax(n_drop, timeList)
        aQEToA = round(timeList.mean(),0)
        dfResult.loc[dfResult[(dfResult.subT == subT)].index, 'aQEToA'] = aQEToA

# Dataframe including all data
dfResult.to_csv('./results/results_table.csv')
dfResult.head(5)

## Personalised plots

In [None]:
# aQEToA dataframe
dfResult = dfResult[['task', 'subT', 'aQEToA', 'pods', 'posts', 'auths', 'acEnforce']] #, 'QET', 'queryNr'
dfResult = dfResult.drop_duplicates()
dfResult = dfResult.reset_index(drop = True)
dfResult

In [None]:
def drawOverheadPlot(t1, t2, xlabel):
    AccEnforced = dfResult[(dfResult.task == t1)].aQEToA
    AccNotEnforced = dfResult[(dfResult.task == t2)].aQEToA
    #x = dfResult[(dfResult.task == task)][xlabel]
    x = dfResult[(dfResult.task == t1)][xlabel]
    
    plt.plot(x,AccEnforced, label = 'AccEnforced')
    plt.plot(x,AccNotEnforced, label = 'AccNotEnforced')
    plt.ylim(bottom=0)
    
    plt.xlabel(xlabel.capitalize())
    plt.ylabel('aQEToA in ms')
    plt.title('Average Query Execution Time over all Queries for Increse in ' + xlabel.capitalize())
    plt.legend(loc='best')
    saveFig(xlabel)
   
    plt.show()

In [None]:
drawOverheadPlot('t1', 't2', 'pods')

In [None]:
drawOverheadPlot('t3', 't4', 'posts')

In [None]:
def drawAuthPlot(task, xlabel):
    y = dfResult[(dfResult.task == task)].aQEToA

    #x = dfResult[(dfResult.task == task)][xlabel]
    x = (0.33, 0.66, 1)
    
    plt.plot(x,y)
    plt.ylim(bottom=0)
    
    plt.xlabel('% of Profile authorised')
    plt.ylabel('aQEToA in ms')
    plt.title('Average Query Execution Time over all Queries for Increse in Authorisations')
    
    saveFig(xlabel)
    
    plt.show()

In [None]:
drawAuthPlot('t5', 'auths')