# Import packages to parse results

In [1]:
from glob import glob
from tabulate import tabulate
from IPython.display import HTML
import pandas as pd
import json

## Log file locations relative to notebook

### Create a dataframe with all json data

In [2]:
def create_dataframe(logs):
    logs = glob(logs)
    df_list = []
    for log in logs:
        with open(log) as f:
            obj = json.load(f)['saaf']
        # cpuInfo is messing up the dataframe... remove for now
        obj.pop('cpuInfo')
        df_list.append(pd.DataFrame([obj]))
    return pd.concat(df_list, ignore_index=True)

### For BWA, we have 3 different clusters, 1 method with two different payloads, multiple concurrent calls

In [3]:
# kOps clusters (running on AWS)
# AWS EKS cluster
clusters={
    'kOps 8x 8vCPU 16Gb': 'kops-multi-tenant-spot-1x-72vcpu-144gb',
    'kOps 1x 72vCPU 144Gb': 'kops-multi-tenant-spot-8x-8vcpu-16gb',
    'EKS 8x 8vCPU 16Gb': 'eks-multi-tenant-spot-8x-8vcpu-16gb'
}
methods=('normal', 'tumor')
headers=list(methods)
headers.insert(0, 'concurrent calls')
concurrent_calls=(2, 3, 4, 5)

sebs_df=dict()
for k, v in clusters.items():
    sebs_df[k]=dict()
    for m in methods:
        sebs_df[k][m]=dict()
        for c in concurrent_calls:
            sebs_df[k][m][c]=create_dataframe(f"../logs/openfaas/{v}/bwa/concurrent/{c}/*{m}*")

# Using tabulate to create tables for results

In [4]:
# Mean runtime
table=dict()
for k in clusters:
    data=[]
    for count, value in enumerate(concurrent_calls):
        data.append([value])
        for m in methods:
            mean_runtime=sebs_df[k][m][value]['runtime'].mean()
            data[count].append(mean_runtime)
    table[k] = tabulate(data, tablefmt='html', headers=headers)
    display(HTML(f'<h3>Mean runtimes (ms), cluster <b>{k}</b></h3>'))
    display(table[k])
    print()

concurrent calls,normal,tumor
2,87631.0,102281.0
3,84378.3,99910.7
4,92541.0,108885.0
5,89524.2,107132.0





concurrent calls,normal,tumor
2,173006.0,90881
3,90374.3,130124
4,135388.0,112562
5,121151.0,105943





concurrent calls,normal,tumor
2,112854,110465
3,100482,114089
4,103851,120964
5,125142,134317





In [5]:
# Standard runtime
table=dict()
for k in clusters:
    data=[]
    for count, value in enumerate(concurrent_calls):
        data.append([value])
        for m in methods:
            std_runtime=sebs_df[k][m][value]['runtime'].std()
            data[count].append(std_runtime)
    table[k] = tabulate(data, tablefmt='html', headers=headers)
    display(HTML(f'<h3>Std runtimes (ms), cluster <b>{k}</b></h3>'))
    display(table[k])
    print()

concurrent calls,normal,tumor
2,851.357,302.642
3,2398.95,981.55
4,2207.17,4258.64
5,6165.6,6550.59





concurrent calls,normal,tumor
2,630.739,748.119
3,11162.1,9493.27
4,37818.2,16702.6
5,24758.5,23521.5





concurrent calls,normal,tumor
2,33944.2,14721.6
3,25133.2,23536.6
4,25452.6,29513.4
5,25065.7,36342.2





In [6]:
# CV runtime
table=dict()
for k in clusters:
    data=[]
    for count, value in enumerate(concurrent_calls):
        data.append([value])
        for m in methods:
            mean_runtime=sebs_df[k][m][value]['runtime'].mean()
            std_runtime=sebs_df[k][m][value]['runtime'].std()
            cv_runtime=std_runtime/mean_runtime
            data[count].append(cv_runtime)
    table[k] = tabulate(data, tablefmt='html', headers=headers)
    display(HTML(f'<h3>CV runtimes (ms), cluster <b>{k}</b></h3>'))
    display(table[k])
    print()

concurrent calls,normal,tumor
2,0.00971524,0.00295892
3,0.0284309,0.00982428
4,0.0238507,0.0391112
5,0.0688708,0.0611448





concurrent calls,normal,tumor
2,0.00364577,0.00823185
3,0.12351,0.0729557
4,0.279332,0.148386
5,0.204361,0.222019





concurrent calls,normal,tumor
2,0.300781,0.133269
3,0.250127,0.206301
4,0.245087,0.243985
5,0.200297,0.27057



