In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals
from IPython.core.display import display, HTML
import matplotlib.pyplot as plt
import pandas as pd
import os, re

plt.rcParams['figure.figsize'] = [10, 5]
plt.rcParams['figure.max_open_warning'] = 50
display(HTML("<style>.container { width:100% !important; }</style>"))

sys.path.insert(0, os.getcwd() + "/../../")
from analysis.utils import PM_HOME, GPU_NAME
from analysis.trace_utils import *

In [5]:
iters = 10
model_names = ['DLRM_default', 'DLRM_MLPerf', 'DLRM_DDP', 'resnet50', 'inception_v3', 'Transformer']
batch_0_sizes = [512, 512, 512, 16, 16, 64]
num_batch_sizes = 4

In [None]:
# while num_batch_sizes > 0:
#     for idx, model_name in enumerate(model_names):
#         print(model_name)
#         module_marker = "DLRM " if "DLRM" in model_name else "## Forward ##"
#         trace_file = '{}/data/{}/e2e/{}/1_{}.json'.format(
#             PM_HOME, 
#             GPU_NAME, 
#             model_name, 
#             2 ** (4 - num_batch_sizes) * batch_0_sizes[idx]
#         )

#         trimmed_trace_file = trim_trace_by_num_iter(trace_file, iters=iters)
#         with open(trimmed_trace_file) as f:
#             trace = json.load(f)
            
#         roots, cc, corrected_start_time, corrected_end_time, sum_skipped_intervals = \
#             process_event_hierarchy(trace['traceEvents'], skip_module=False, module_marker=module_marker)
#         host_runtime = corrected_end_time - corrected_start_time - sum_skipped_intervals
#         device_runtime = host_runtime
#         ops = []
#         get_operators(roots, ops)

#         op_device_runtime = get_device_runtime(ops, cc) # dict: op ex_id -> all its device calls and stats
#         dt_breakdown = device_runtime_breakdown(roots, op_device_runtime, depth=0)
#         truncate_count = 10
#         flatten = {}
#         for stream, v in dt_breakdown.items():
#             flatten[stream] = {}
#             get_major_device_results(device_runtime, dt_breakdown[stream], flatten[stream])
        
#         for stream, v in flatten.items():
#             runtime_no_pf = -1
#             log_file = "{}/data/{}/e2e/{}/1_{}.log".format(PM_HOME, GPU_NAME, model_name, batch_0_sizes[idx] * 4)
#             if os.path.exists(log_file):
#                 for line in open(log_file, 'r'):
#                     if re.search("Overall per-batch", line):
#                         runtime_no_pf = float(line.split(' ')[4]) * 1000 * iters # us

#             per_op = {}
#             total = 0.0
#             for k, vv in v.items():
#                 if k == 'total' or 'DLRM ' in k[0] or 'module' in k[0]: # Skip all labels
#                     continue
#                 k0 = k[0] if '#' not in k[0] else k[0].split('#')[0]
#                 if k0 not in per_op.keys():
#                     per_op[k0] = 0.0
#                 per_op[k0] += vv['runtime']
#                 total += vv['runtime']

#             tmp = sorted(per_op.items(), key=lambda x: x[1], reverse=True)
#             op = [x[0] for x in tmp]
#             p = [x[1] / total for x in tmp]
#             df0 = pd.DataFrame({
#                 'Active time': [v['total']['runtime'] / runtime_no_pf],
#                 'Idle time': [1 - v['total']['runtime'] / runtime_no_pf]
#             })
#             print(df0)
#     num_batch_sizes -= 1

In [None]:
UTILS = [
    [0.5383, 0.7027, 0.8234, 0.9107],
    [0.5388, 0.6577, 0.7997, 0.9073],
    [0.3326, 0.4272, 0.6063, 0.7457],
    [0.8129, 0.9572, 0.9839, 0.9889],
    [0.6753, 0.8500, 0.9723, 0.9829],
    [0.8630, 0.9345, 0.9514, 0.9651]
]

dfs = {}
for idx, model in enumerate(model_names):
    gpu_util = UTILS[idx]
    dfs[model] = pd.DataFrame([
        [gpu_util[i], 1 - gpu_util[i]] for i in range(0, num_batch_sizes)
    ], columns=['active', 'idle'], index=[batch_0_sizes[idx] * (2**i) for i in range(0, num_batch_sizes)])

In [None]:
fontsize = 10
idx = 0
bar_width = 8
bar_gap = 3
model_gap = 60
xticks = []
xticklabels = []
yticks = [0, 0.2, 0.4, 0.6, 0.8, 1.0]
yticklabels = ["{:.0f}%".format(x*100) for x in yticks]
for model_name, df in dfs.items():
    plt.grid(axis='y')
    labels = [model_gap * (idx+1) + (bar_width + bar_gap) * i - (bar_width + bar_gap) * (num_batch_sizes - 1) / 2 for i in range(0, 4)]
    plt.bar(labels, df['active'], bar_width, color=[plt.get_cmap("Set2")(idx)] * num_batch_sizes)
    plt.bar(labels, df['idle'], bar_width, bottom=df['active'], color=['gray'] * num_batch_sizes)
    # for i, x in enumerate(labels):
    #     plt.text(x, df['active'].values[i]-0.04, "{:.2f}%".format(df['active'].values[i]*100), horizontalalignment='center', verticalalignment='center', size=fontsize)
    plt.text(model_gap * (idx+1), 1.08, model_name, horizontalalignment='center', verticalalignment='center', size=fontsize*2)
    xticks.extend(labels)
    xticklabels.extend([batch_0_sizes[idx] * (2**i) for i in range(num_batch_sizes)])
    idx += 1
plt.xticks(xticks, labels=xticklabels, fontsize=fontsize*1.5)
plt.yticks(yticks, labels=yticklabels, fontsize=fontsize*1.5)
plt.xlabel("Batch Size", fontsize=fontsize*2)
plt.ylabel("Percentage", fontsize=fontsize*2)
plt.tight_layout()
plt.grid(axis='y')
plt.rcParams['figure.figsize'] = [20, 8]
plt.savefig('{}/data/{}/e2e/active_idle_split.pdf'.format(PM_HOME, GPU_NAME), bbox_inches='tight')
plt.savefig('{}/data/{}/e2e/active_idle_split.png'.format(PM_HOME, GPU_NAME), bbox_inches='tight')