Skip to content

Commit

Permalink
adds timing information to graph (#522)
Browse files Browse the repository at this point in the history
* adds timing information for graph
  • Loading branch information
lanpa committed Oct 19, 2019
1 parent 370bb55 commit 55ab152
Showing 1 changed file with 42 additions and 5 deletions.
47 changes: 42 additions & 5 deletions tensorboardX/pytorch_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

methods_OP = ['attributeNames', 'hasMultipleOutputs', 'hasUses', 'inputs',
'kind', 'outputs', 'outputsSize', 'scopeName']
methods_IO = []
methods_IO = []
backward_compat_mode = False

class NodeBase(object):
Expand Down Expand Up @@ -122,6 +122,7 @@ def __init__(self):
self.unique_name_to_scoped_name = {}
self.shallowest_scope_name = 'default'
self.scope_name_appeared = []
self.profile_result = None

def append(self, x):
if isinstance(x, NodePyIO):
Expand Down Expand Up @@ -181,34 +182,57 @@ def to_proto(self):
import numpy as np
nodes = []
node_stats = []

if self.profile_result is not None:
profile_result = self.profile_result.function_events

_time_used_for_op = {}

# We assume that the model is executed sequentially. So get the timing from
# the first matched item. If it is matched, remove that item with `pop()`
def find_time_for(node_name):
for i, n in enumerate(profile_result):
if n.key == node_name:
profile_result.pop(i)
time_we_want = n.cpu_time_total
return int(time_we_want)

for v in self.nodes_io.values():
nodes.append(node_proto(v.debugName,
input=v.inputs,
outputsize=v.tensor_size,
op=v.kind,
attributes=v.attributes))

# For timing information, we are only interested in aten operators now.
# prim:: and Parameter
if 'aten' in v.kind and self.profile_result is not None:
opname = v.kind.split('::')[1]
exe_time = find_time_for(opname)
node_stats.append(
NodeExecStats(node_name=v.debugName,
all_start_micros=int(time.time() * 1e7),
all_end_rel_micros=exe_time))

if v.tensor_size and len(v.tensor_size) > 0: # assume data is float32, only parameter is counted
node_stats.append(
NodeExecStats(node_name=v.debugName,
all_start_micros=int(time.time() * 1e7),
all_end_rel_micros=42,
memory=[AllocatorMemoryUsed(allocator_name="cpu",
total_bytes=int(np.prod(v.tensor_size)) * 4)]))

return nodes, node_stats


# one argument: 'hasAttribute', 'hasAttributes',
def parse(graph, args=None, omit_useless_nodes=True):
def parse(graph, args=None, profile_result=None):
"""This method parses an optimized PyTorch model graph and produces
a list of nodes and node stats for eventual conversion to TensorBoard
protobuf format.
Args:
graph (PyTorch module): The model to be parsed.
args (tuple): input tensor[s] for the model.
omit_useless_nodes (boolean): Whether to remove nodes from the graph.
"""
import torch
n_inputs = len(args) # not sure...
Expand All @@ -222,6 +246,7 @@ def parse(graph, args=None, omit_useless_nodes=True):
backward_compat_mode = True

nodes_py = GraphPy()
nodes_py.profile_result = profile_result

for node in graph.inputs():
if node.debugName() == 'self':
Expand Down Expand Up @@ -287,9 +312,21 @@ def graph(model, args, verbose=False, **kwargs):
# The producer version has been reverse engineered from standard
# TensorBoard logged data.

try:
with torch.autograd.profiler.profile(record_shapes=True) as prof:
if len(args) == 1 and isinstance(args, tuple) or isinstance(args, list):
args = args[0]
result = model(args)
else:
result = model(*args)

except RuntimeError as e:
print('profiler execution failed')
prof = None

if verbose:
print(graph)
list_of_nodes, node_stats = parse(graph, args)
list_of_nodes, node_stats = parse(graph, args, prof)
# We are hardcoding that this was run on CPU even though it might have actually
# run on GPU. Note this is what is shown in TensorBoard and has no bearing
# on actual execution.
Expand Down

0 comments on commit 55ab152

Please sign in to comment.