In [5]:
import pandas as pd
import glob

In [6]:
results_fps = glob.glob('./results/ind_exp_results_*.csv')
results_d = {}
for results_fp in results_fps:
    instance_name = results_fp[26:-4]
    results_d[instance_name] = pd.read_csv(results_fp)

In [7]:
instance_names = list(results_d.keys())

In [8]:
results_d['g5.xlarge'].describe()

Unnamed: 0,batch_size,steps,steps_per_loop,cost_per_hour,steps_per_second,total_time,total_cost
count,24.0,24.0,24.0,24.0,24.0,24.0,24.0
mean,22.166667,2725.0,27.0,1.01,417.725569,50.137074,0.014066
std,37.136255,2326.454664,43.296752,0.0,1009.9798,64.725784,0.018159
min,2.0,300.0,-1.0,1.01,21.530674,1.53405,0.00043
25%,3.5,525.0,-1.0,1.01,24.722375,9.81404,0.002753
50%,6.0,2800.0,4.5,1.01,45.017472,18.629175,0.005227
75%,14.0,5000.0,32.5,1.01,67.507201,65.741118,0.018444
max,128.0,5000.0,100.0,1.01,3259.347168,215.388736,0.060429


In [9]:
results_d['g6.xlarge'].describe()

Unnamed: 0,batch_size,steps,steps_per_loop,cost_per_hour,steps_per_second,total_time,total_cost
count,24.0,24.0,24.0,24.0,24.0,24.0,24.0
mean,22.166667,2725.0,27.0,0.8,435.461128,52.682219,0.011707
std,37.136255,2326.454664,43.296752,1.134102e-16,1089.907207,79.878866,0.017751
min,2.0,300.0,-1.0,0.8,18.390761,1.273351,0.000283
25%,3.5,525.0,-1.0,0.8,19.558263,7.779094,0.001729
50%,6.0,2800.0,4.5,0.8,58.850505,23.485566,0.005219
75%,14.0,5000.0,32.5,0.8,98.76182,50.435851,0.011208
max,128.0,5000.0,100.0,0.8,3926.647949,256.122035,0.056916


In [10]:
def get_exp_name(ser):
    return f'exp_{ser["model"]}_{"train" if ser["is_train"] else "inference"}_{ser["batch_size"]}_{ser["steps"]}_{ser["steps_per_loop"]}'

from itertools import groupby
def all_eq(iterable):
    g = groupby(iterable)
    return next(g, True) and not next(g, False)

In [11]:
from collections import defaultdict

CONFIG_LEN = 24

aggregate_data = defaultdict(list)
for i in range(CONFIG_LEN):
    assert all_eq([get_exp_name(results_d[instance_name].iloc[i]) for instance_name in instance_names]) # assert experiments to be equal
    aggregate_data['exp_name'].append(get_exp_name(results_d[instance_names[0]].iloc[i]))
    for instance_name in instance_names:
        data_row = results_d[instance_name].iloc[i]
        sps = data_row['steps_per_second']
        tt = data_row['total_time']
        tc = data_row['total_cost']
        aggregate_data[f'steps_per_second_{instance_name}'].append(sps)
        aggregate_data[f'total_time_{instance_name}'].append(tt)
        aggregate_data[f'total_cost_{instance_name}'].append(tc)

aggregate_df = pd.DataFrame(aggregate_data)
aggregate_df = aggregate_df.reindex(sorted(aggregate_df.columns), axis=1)
aggregate_df

Unnamed: 0,exp_name,steps_per_second_g5.xlarge,steps_per_second_g6.xlarge,total_cost_g5.xlarge,total_cost_g6.xlarge,total_time_g5.xlarge,total_time_g6.xlarge
0,exp_resnet18_train_128_5000_100,52.622479,64.105677,0.026657,0.017332,95.016429,77.996212
1,exp_resnet18_train_64_5000_100,95.337982,113.380984,0.014714,0.0098,52.444995,44.099106
2,exp_resnet18_train_32_5000_100,104.56287,125.669095,0.013416,0.008842,47.818121,39.78703
3,exp_bert_base_train_8_5000_100,23.213842,19.521944,0.060429,0.056916,215.388736,256.122035
4,exp_bert_base_train_4_5000_100,24.365657,19.802074,0.057572,0.056111,205.206861,252.498798
5,exp_bert_base_train_2_5000_100,27.041615,20.004046,0.051875,0.055544,184.900196,249.94943
6,exp_retinanet_train_8_300_10,21.530674,18.390761,0.003909,0.003625,13.933609,16.312538
7,exp_retinanet_train_4_300_10,22.435045,18.723162,0.003752,0.003561,13.371936,16.022934
8,exp_retinanet_train_2_300_10,24.84128,18.744999,0.003388,0.003557,12.076672,16.004268
9,exp_maskrcnn_train_8_600_10,22.203013,19.024959,0.007582,0.007008,27.023359,31.537519


In [12]:
aggregate_df.to_csv('./results/agg_exp_results.csv', index=False)