In [38]:
import pandas as pd
import numpy as np
import os

In [39]:
cpu = pd.DataFrame(columns=['filename', 'provider', 'vcpu(s)', 'device', 'instance', 'iteration', 'events', 'runtime(ms)', 'time_per_task(ms)'])
gpu = pd.DataFrame(columns=['filename', 'provider', 'device', 'instance', 'iteration', 'sp_peak', 'dp_peak', 'im_peak'])
cpu_ml = pd.DataFrame(columns=['filename', 'provider', 'vcpu(s)', 'device', 'instance', 'iteration', 'accuracy', 'runtime(ms)'])
gpu_ml = pd.DataFrame(columns=['filename', 'provider', 'vcpu(s)', 'device', 'instance', 'iteration', 'accuracy', 'runtime(ms)'])

In [40]:
OUTPUT_DIR = "outputs/"

In [41]:
def insert(df, record):
    idx = df.shape[0] + 1
    df.loc[idx] = record

In [42]:
def summarize_exp(exp):
    exp = exp.rename(columns=exp.iloc[0]).iloc[1:, :]
    for col in exp:
        exp[col] = exp[col].astype('float')
    exp = exp.replace([np.inf, -np.inf], np.nan).dropna(how='any')
    # return peak GFLOPS (throughput)
    return "%.4f"%exp.max()[2]
def process_gpu_benchmark(filename):
    f = open(OUTPUT_DIR + filename)
    provider, device, other = filename.split('_')
    mode, other = other.split('-')
    instance = other.split('.')[0]
    iteration = 0
    start_csv = False
    csv = []
    for line in f:
        if "Experiment ID" in line:
            start_csv = True
        elif "-------------" in line:
            if start_csv:
                start_csv = False
                # finish scanning a csv file
                result = pd.DataFrame(csv)
                record = [summarize_exp(result.iloc[:, i*4+1:i*4+5]) for i in [0, 1, 3]]
                iteration += 1
                insert(gpu, [filename, provider, device, instance, iteration] + record)
                csv = []
        elif start_csv:
            csv.append(line.strip().split(','))

In [43]:
def process_cpu_benchmark(filename):
    f = open(OUTPUT_DIR + filename)
    provider, vcpu, other = filename.split('_')
    device, other = other.split('-')
    instance = other.split('.')[0]
    iteration = 0
    for line in f:
        if "total time:" in line:
            time = float(line.split(':')[-1].split('s')[0].strip()) * 1000
        elif "total number of events:" in line:
            events = int(line.split(':')[-1].strip())
            time_per_task = time / events
            iteration += 1
            insert(cpu, [filename, provider, vcpu, device, instance, iteration, events, time, time_per_task])

In [44]:
def process_ml_benchmark(filename):
    f = open(OUTPUT_DIR + filename)
    provider, vcpu, device, other = filename.split('_')
    instance = other.split('-')[-1].split('.')[0]
    iteration = 0
    for line in f:
        if "Accuracy:" in line:
            accuracy = eval(line.strip().split('Accuracy: ')[1].split()[0])
        elif "Total training completion time" in line:
            iteration += 1
            time = line.split()[-2]
            if device == "cpu":
                insert(cpu_ml, [filename, provider, vcpu, device, instance, iteration, accuracy, time])
            else:
                insert(gpu_ml, [filename, provider, vcpu, device, instance, iteration, accuracy, time])

In [45]:
for f in os.listdir(OUTPUT_DIR):
    if "ml" in f:
        process_ml_benchmark(f)
    elif "cpu" in f:
        process_cpu_benchmark(f)
    elif "gpu" in f:
        process_gpu_benchmark(f)
    else:
        print("Unrecognized Filename: %s"%f)

In [46]:
gpu.head()

Unnamed: 0,filename,provider,device,instance,iteration,sp_peak,dp_peak,im_peak
1,amazon_gpu_alt-2.out,amazon,gpu,2,0,3451.88,1375.43,692.15
2,amazon_gpu_alt-2.out,amazon,gpu,2,0,3449.62,1375.86,690.75
3,amazon_gpu_alt-2.out,amazon,gpu,2,0,3455.28,1375.69,689.31
4,amazon_gpu_alt-2.out,amazon,gpu,2,0,3451.55,1375.27,691.22
5,amazon_gpu_alt-2.out,amazon,gpu,2,0,3469.8,1360.48,691.87


In [144]:
gpu_ml.head()

Unnamed: 0,filename,provider,vcpu(s),device,instance,iteration,accuracy,runtime(ms)
1,amazon_4_gpu_ml-1.out,amazon,4,gpu,1,1,0.987,114515
2,amazon_4_gpu_ml-1.out,amazon,4,gpu,1,2,0.9866,113274
3,amazon_4_gpu_ml-1.out,amazon,4,gpu,1,3,0.9864,113262
4,amazon_4_gpu_ml-1.out,amazon,4,gpu,1,4,0.987,112760
5,amazon_4_gpu_ml-1.out,amazon,4,gpu,1,5,0.9863,113149


In [145]:
cpu.head()

Unnamed: 0,filename,provider,vcpu(s),device,instance,iteration,events,runtime(ms),time_per_task(ms)
1,amazon_4_cpu-3.out,amazon,4,cpu,3,1,64,10200.3,159.379688
2,amazon_4_cpu-3.out,amazon,4,cpu,3,2,64,10190.9,159.232812
3,amazon_4_cpu-3.out,amazon,4,cpu,3,3,64,10198.4,159.35
4,amazon_4_cpu-3.out,amazon,4,cpu,3,4,64,10197.0,159.328125
5,amazon_4_cpu-3.out,amazon,4,cpu,3,5,64,10198.3,159.348437


In [47]:
cpu_ml.head()

Unnamed: 0,filename,provider,vcpu(s),device,instance,iteration,accuracy,runtime(ms)
1,amazon_4_cpu_ml-3.out,amazon,4,cpu,3,1,0.9876,240842
2,amazon_4_cpu_ml-3.out,amazon,4,cpu,3,2,0.9868,240499
3,amazon_4_cpu_ml-3.out,amazon,4,cpu,3,3,0.9852,243172
4,amazon_4_cpu_ml-3.out,amazon,4,cpu,3,4,0.9868,245767
5,amazon_4_cpu_ml-3.out,amazon,4,cpu,3,5,0.9871,242634
