In [2]:
import pandas as pd
import numpy as np
import os

In [24]:
cpu_price = pd.DataFrame(data=[['amazon', 4, 0.17], 
                           ['amazon', 8, 0.34],
                           ['amazon', 16, 0.68],
                           ['amazon', 36, 1.53],
                          ['google', 4, 0.2088],
                          ['google', 8, 0.4176],
                          ['google', 16, 0.8325],
                          ['microsoft', 4, 0.17],
                          ['microsoft', 8, 0.34]],
                    columns=['provider', 'vcpu(s)','price'])

In [25]:
cpu_price

Unnamed: 0,provider,vcpu(s),price
0,amazon,4,0.17
1,amazon,8,0.34
2,amazon,16,0.68
3,amazon,36,1.53
4,google,4,0.2088
5,google,8,0.4176
6,google,16,0.8325
7,microsoft,4,0.17
8,microsoft,8,0.34


In [88]:
gpu_price = pd.DataFrame(data=[['amazon', 0.9],
                              ['google', 0],
                              ['microsoft', 0]],
                        columns=['provider','price'])

In [12]:
OUTPUT_DIR = "outputs/"

In [13]:
def insert(df, record):
    idx = df.shape[0] + 1
    df.loc[idx] = record

In [14]:
def summarize_exp(exp):
    exp = exp.rename(columns=exp.iloc[0]).iloc[1:, :]
    for col in exp:
        exp[col] = exp[col].astype('float')
    exp = exp.replace([np.inf, -np.inf], np.nan).dropna(how='any')
    # return peak GFLOPS (throughput)
    return "%.4f"%exp.max()[2]
def process_gpu_benchmark(filename):
    f = open(OUTPUT_DIR + filename)
    provider, device, other = filename.split('_')
    mode, other = other.split('-')
    instance = other.split('.')[0]
    iteration = 0
    start_csv = False
    csv = []
    for line in f:
        if "Experiment ID" in line:
            start_csv = True
        elif "-------------" in line:
            if start_csv:
                start_csv = False
                # finish scanning a csv file
                result = pd.DataFrame(csv)
                record = [summarize_exp(result.iloc[:, i*4+1:i*4+5]) for i in [0, 1, 3]]
                iteration += 1
                insert(gpu, [filename, provider, device, instance, iteration] + record)
                csv = []
        elif start_csv:
            csv.append(line.strip().split(','))

In [15]:
def process_cpu_benchmark(filename):
    f = open(OUTPUT_DIR + filename)
    provider, vcpu, other = filename.split('_')
    device, other = other.split('-')
    instance = other.split('.')[0]
    iteration = 0
    for line in f:
        if "total time:" in line:
            time = float(line.split(':')[-1].split('s')[0].strip()) * 1000
        elif "total number of events:" in line:
            events = int(line.split(':')[-1].strip())
            time_per_task = time / events
            iteration += 1
            insert(cpu, [filename, provider, vcpu, device, instance, iteration, events, time, time_per_task])

In [16]:
def process_ml_benchmark(filename):
    f = open(OUTPUT_DIR + filename)
    provider, vcpu, device, other = filename.split('_')
    instance = other.split('-')[-1].split('.')[0]
    iteration = 0
    for line in f:
        if "Accuracy:" in line:
            accuracy = eval(line.strip().split('Accuracy: ')[1].split()[0])
        elif "Total training completion time" in line:
            iteration += 1
            time = line.split()[-2]
            if device == "cpu":
                insert(cpu_ml, [filename, provider, vcpu, device, instance, iteration, accuracy, time])
            else:
                insert(gpu_ml, [filename, provider, vcpu, device, instance, iteration, accuracy, time])

In [63]:
cpu = pd.DataFrame(columns=['filename', 'provider', 'vcpu(s)', 'device', 'instance', 'iteration', 'events', 'runtime(ms)', 'time_per_task(ms)'])
gpu = pd.DataFrame(columns=['filename', 'provider', 'device', 'instance', 'iteration', 'sp_peak', 'dp_peak', 'im_peak'])
cpu_ml = pd.DataFrame(columns=['filename', 'provider', 'vcpu(s)', 'device', 'instance', 'iteration', 'accuracy', 'runtime(ms)'])
gpu_ml = pd.DataFrame(columns=['filename', 'provider', 'vcpu(s)', 'device', 'instance', 'iteration', 'accuracy', 'runtime(ms)'])
for f in os.listdir(OUTPUT_DIR):
    if "ml" in f:
        process_ml_benchmark(f)
    elif "cpu" in f:
        process_cpu_benchmark(f)
    elif "gpu" in f:
        process_gpu_benchmark(f)
    else:
        print("Unrecognized Filename: %s"%f)

Unrecognized Filename: ocl


In [81]:
def summarize_cpu(df, target):
    df['vcpu(s)'] = df['vcpu(s)'].astype('int')
    df[target] = df[target].astype('float')
    df = df.merge(cpu_price, how='outer')
    df['normalized_runtime'] = df[target] / float(df.groupby(['provider','vcpu(s)'])[target].mean()[0])
    df['cost_per_task'] = df['normalized_runtime'] * df['price']
    df['normalized_cost'] = df['cost_per_task'] / float(df.groupby(['provider','vcpu(s)'])['cost_per_task'].mean()[0])
    return df

In [102]:
def summarize_gpu(df, target='runtime(ms)'):
    df = df.merge(gpu_price, how='outer')
    if isinstance(target, list):
        for t in target:
            df[t] = df[t].astype('float')
            df['normalized_%s'%t] = df[t] / df.groupby(['provider'])[t].mean()[0]
            df['cost_per_perf_%s'%t] = df['normalized_%s'%t] * df['price']
            df['normalized_cost_%s'%t] = df['cost_per_perf_%s'%t] / float(df.groupby(['provider'])['cost_per_perf_%s'%t].mean()[0])
            
    else:
        df[target] = df[target].astype('float')
        df['normalized_runtime'] = df[target] / df.groupby(['provider'])[target].mean()[0]
        df['cost_per_task'] = df['normalized_runtime'] * df['price']
        df['normalized_cost'] = df['cost_per_task'] / float(df.groupby(['provider'])['cost_per_task'].mean()[0])
    return df

In [98]:
summarized_gpu_ml = summarize_gpu(gpu_ml)

In [84]:
summarized_cpu = summarize_cpu(cpu, 'time_per_task(ms)')

In [85]:
summarized_cpu_ml = summarize_cpu(cpu_ml, 'runtime(ms)')

In [103]:
summarized_gpu = summarize_gpu(gpu, ['sp_peak','dp_peak','im_peak'])

In [104]:
summarized_gpu.head()

Unnamed: 0,filename,provider,device,instance,iteration,sp_peak,dp_peak,im_peak,price,normalized_sp_peak,cost_per_perf_sp_peak,normalized_cost_sp_peak,normalized_dp_peak,cost_per_perf_dp_peak,normalized_cost_dp_peak,normalized_im_peak,cost_per_perf_im_peak,normalized_cost_im_peak
0,amazon_gpu_alt-2.out,amazon,gpu,2,1,3451.88,1375.43,692.15,0.9,1.007785,0.907006,1.007785,0.998132,0.898319,0.998132,0.997601,0.897841,0.997601
1,amazon_gpu_alt-2.out,amazon,gpu,2,2,3449.62,1375.86,690.75,0.9,1.007125,0.906412,1.007125,0.998444,0.898599,0.998444,0.995583,0.896025,0.995583
2,amazon_gpu_alt-2.out,amazon,gpu,2,3,3455.28,1375.69,689.31,0.9,1.008777,0.9079,1.008777,0.998321,0.898488,0.998321,0.993507,0.894157,0.993507
3,amazon_gpu_alt-2.out,amazon,gpu,2,4,3451.55,1375.27,691.22,0.9,1.007688,0.90692,1.007688,0.998016,0.898214,0.998016,0.99626,0.896634,0.99626
4,amazon_gpu_alt-2.out,amazon,gpu,2,5,3469.8,1360.48,691.87,0.9,1.013017,0.911715,1.013017,0.987283,0.888555,0.987283,0.997197,0.897477,0.997197


In [99]:
summarized_gpu_ml.head()

Unnamed: 0,filename,provider,vcpu(s),device,instance,iteration,accuracy,runtime(ms),normalized_runtime,price,cost_per_task,normalized_cost
0,amazon_4_gpu_ml-1.out,amazon,4,gpu,1,1,0.987,114515.0,1.023552,0.9,0.921196,1.023552
1,amazon_4_gpu_ml-1.out,amazon,4,gpu,1,2,0.9866,113274.0,1.012459,0.9,0.911213,1.012459
2,amazon_4_gpu_ml-1.out,amazon,4,gpu,1,3,0.9864,113262.0,1.012352,0.9,0.911117,1.012352
3,amazon_4_gpu_ml-1.out,amazon,4,gpu,1,4,0.987,112760.0,1.007865,0.9,0.907079,1.007865
4,amazon_4_gpu_ml-1.out,amazon,4,gpu,1,5,0.9863,113149.0,1.011342,0.9,0.910208,1.011342


In [86]:
summarized_cpu.head()

Unnamed: 0,filename,provider,vcpu(s),device,instance,iteration,events,runtime(ms),time_per_task(ms),price,normalized_runtime,cost_per_task,normalized_cost
0,amazon_4_cpu-3.out,amazon,4,cpu,3,1,64,10200.3,159.379688,0.17,1.00007,0.170012,1.00007
1,amazon_4_cpu-3.out,amazon,4,cpu,3,2,64,10190.9,159.232812,0.17,0.999149,0.169855,0.999149
2,amazon_4_cpu-3.out,amazon,4,cpu,3,3,64,10198.4,159.35,0.17,0.999884,0.16998,0.999884
3,amazon_4_cpu-3.out,amazon,4,cpu,3,4,64,10197.0,159.328125,0.17,0.999747,0.169957,0.999747
4,amazon_4_cpu-3.out,amazon,4,cpu,3,5,64,10198.3,159.348437,0.17,0.999874,0.169979,0.999874


In [87]:
summarized_cpu_ml.head()

Unnamed: 0,filename,provider,vcpu(s),device,instance,iteration,accuracy,runtime(ms),price,normalized_runtime,cost_per_task,normalized_cost
0,amazon_4_cpu_ml-3.out,amazon,4,cpu,3,1,0.9876,240842.0,0.17,0.980479,0.166681,0.980479
1,amazon_4_cpu_ml-3.out,amazon,4,cpu,3,2,0.9868,240499.0,0.17,0.979082,0.166444,0.979082
2,amazon_4_cpu_ml-3.out,amazon,4,cpu,3,3,0.9852,243172.0,0.17,0.989964,0.168294,0.989964
3,amazon_4_cpu_ml-3.out,amazon,4,cpu,3,4,0.9868,245767.0,0.17,1.000528,0.17009,1.000528
4,amazon_4_cpu_ml-3.out,amazon,4,cpu,3,5,0.9871,242634.0,0.17,0.987774,0.167922,0.987774
