In [1]:
import json
import numpy
from collections import OrderedDict
import sys

def load_hardware_trace_json(file):
    """
    Reads a timeline.json file output by Tensorflow/libcupti and returns and OrderedDict object
    :param file: .json file.
    :return: OrderedDict
    """
    with open(file, mode='r') as f:

        def _as_ordered_dict(val):
            return OrderedDict(val)

        def _as_list(val):
            return list(val)

        output = json.load(f, object_hook=_as_ordered_dict, object_pairs_hook=_as_ordered_dict)
        dic = OrderedDict(output)

    return dic

def get_all_ops(trace_dic):
    """
    Params: 
    trace_dic: collections.OrderedDict of traceEvent
    Return: list of dictionaries of all ops.
    """
    try:
        traceEvents = trace_dic['traceEvents']
    except KeyError:
        print('Not valid GPU trace dict object.')
        sys.exit()
    all_ops = []
    for trace in traceEvents:
        try:
            if trace['cat'] == 'Op':
                all_ops.append(trace)
        except KeyError:
            pass
    return all_ops

def get_unique_ops_names(all_ops):
    '''
    Find unique op names.
    Params: 
    all_ops: list, of dictionary of all operations.
    Return: list of unique op names.
    '''
    return set(op['name'] for op in all_ops)

def get_wall_duration(op_names, all_ops, pid_list=[11, 7, 13, 15, 9]):
    '''
    Calculates wall duration for each op in op_names.
    Params:
    op_names: list (str), names of ops of interest.
    pid_list: list (str), names of pid to include.
    all_ops: output of get_all_ops().
    Return:
    total wall duration, dict['op'] = wall duration.
    '''
   #1. Construct dictionary of op with name matching op_names 
    ops_dic=OrderedDict()
    for name in unique_op_names:
        ops = []
        for op in all_ops:
            if op['name'] == name:
                ops.append(op)
        ops_dic[name] = ops
    
    #2. get duration for each op
    op_dict = OrderedDict()
    total_dur = 0
    for op_name in op_names:
        op_dur = 0
        for itm in ops_dic[op_name]:
            if itm['pid'] in pid_list:
                op_dur += itm['dur']
        op_dict[op_name] = op_dur*1e-3 # convert from us to ms
        total_dur += op_dur*1e-3
        
    print('Wall Duration (ms): %4.3f' % total_dur)
    return total_dur, op_dict

In [2]:
# Load timeline and get all ops information
dic = load_hardware_trace_json('timeline.ctf.3.json')
all_ops = get_all_ops(dic)
# Find Names of unique Ops
unique_op_names = get_unique_ops_names(all_ops)
print(unique_op_names)

{'_ParallelConcatStart', 'ConcatV2', 'ReluGrad', 'L2Loss', 'Mean', 'Conv2DBackpropInput', 'FusedBatchNormGradV2', 'Transpose', 'MEMCPYDtoD', 'Sqrt', 'Conv2D', '_Retval', 'HorovodAllreduce', 'ExpandDims', 'RealDiv', '_ParallelConcatUpdate', 'NoOp', 'AvgPoolGrad', 'Cast', 'Relu', 'Neg', 'MatMul', 'SquaredDifference', 'Assign', 'Square', 'MaxPool', 'Unstage', 'BiasAddGrad', 'VariableV2', 'RecordInput', 'Pow', 'Maximum', 'MaxPoolGrad', 'MEMCPYDtoH', 'unknown', 'FusedBatchNormV2', 'Squeeze', 'Floor', 'Sum', 'Mul', 'ShapeN', 'AssignSub', 'ApplyAdam', 'AddN', 'DecodeRaw', 'Split', 'MEMCPYHtoD', 'AssignAdd', 'Sub', 'Conv2DBackpropFilter', 'Stage', 'Identity', 'BiasAdd', 'AvgPool', 'Select', 'Const', 'ParseExample', 'Reshape', 'Add', 'Tile'}


In [3]:
# Calculate wall duration of all ops
total_dur, dur_dic = get_wall_duration(unique_op_names, all_ops)

Wall Duration (ms): 285.273


In [4]:
# Calculate wall duration of only forward analytical ops
analytical_ops = ['Add','BiasAdd','Conv2D','Mul', 'AssignAdd', 
                  'AvgPool', 'MaxPool', 'RealDiv']
total_dur, dur_dic = get_wall_duration(analytical_ops, all_ops)

Wall Duration (ms): 44.178
