diff --git a/test/test_profiler.py b/test/test_profiler.py index 8d3d24dde805..5ffc456e289c 100644 --- a/test/test_profiler.py +++ b/test/test_profiler.py @@ -2,6 +2,7 @@ import gc import unittest +import tempfile import torch import torch.nn as nn import torch.optim @@ -281,6 +282,26 @@ def trace_handler(p): print(p.key_averages().table( sort_by="self_cuda_time_total", row_limit=-1)) + def test_export_stacks(self): + with profile(with_stack=True, use_kineto=kineto_available()) as p: + x = torch.randn(10, 10) + y = torch.randn(10, 10) + z = torch.mm(x, y) + z = z + y + + with tempfile.NamedTemporaryFile(mode="w+") as f: + p.export_stacks(f.name) + lines = f.readlines() + assert len(lines) > 0, "Empty stacks file" + for line in lines: + is_int = False + try: + assert int(line.split(" ")[-1]) > 0, "Invalid stacks record" + is_int = True + except ValueError: + pass + assert is_int, "Invalid stacks record" + if __name__ == '__main__': run_tests() diff --git a/torch/autograd/profiler.py b/torch/autograd/profiler.py index 5aef3f95aa0a..208281d99d2b 100644 --- a/torch/autograd/profiler.py +++ b/torch/autograd/profiler.py @@ -266,6 +266,25 @@ def export_chrome_trace(self, path): f.truncate() f.write("]") + def supported_export_stacks_metrics(self): + return ["self_cpu_time_total", "self_cuda_time_total"] + + def export_stacks(self, path: str, metric: str): + if metric not in self.supported_export_stacks_metrics(): + raise ValueError("metric should be one of: " + str(self.supported_export_stacks_metrics())) + translate_table = str.maketrans(" ;\t\n", "____") + with open(path, 'w') as f: + for evt in self: + if evt.stack and len(evt.stack) > 0: + metric_value = getattr(evt, metric) + if int(metric_value) > 0: + stack_str = "" + for entry in reversed(evt.stack): + stack_str += entry.translate(translate_table) + stack_str += ";" + stack_str = stack_str[:-1] + " " + str(int(metric_value)) + f.write(stack_str + "\n") + def key_averages(self, group_by_input_shapes=False, group_by_stack_n=0): """Averages all function events over their keys. @@ -522,15 +541,21 @@ def export_chrome_trace(self, path): return self.function_events.export_chrome_trace(path) export_chrome_trace.__doc__ = EventList.export_chrome_trace.__doc__ + def export_stacks(self, path: str, metric: str = "self_cpu_time_total"): + self._check_finish() + assert self.function_events is not None, "Expected profiling results" + assert self.with_stack, "export_stacks() requires with_stack=True" + return self.function_events.export_stacks(path, metric) + def key_averages(self, group_by_input_shape=False, group_by_stack_n=0): self._check_finish() - assert self.function_events is not None + assert self.function_events is not None, "Expected profiling results" return self.function_events.key_averages(group_by_input_shape, group_by_stack_n) key_averages.__doc__ = EventList.key_averages.__doc__ def total_average(self): self._check_finish() - assert self.function_events is not None + assert self.function_events is not None, "Expected profiling results" return self.function_events.total_average() total_average.__doc__ = EventList.total_average.__doc__ diff --git a/torch/profiler/profiler.py b/torch/profiler/profiler.py index 652a76262df2..1d407fcd8bbb 100644 --- a/torch/profiler/profiler.py +++ b/torch/profiler/profiler.py @@ -245,6 +245,25 @@ def export_chrome_trace(self, path: str): assert self.profiler return self.profiler.export_chrome_trace(path) + def export_stacks(self, path: str, metric: str = "self_cpu_time_total"): + """ + Save stack traces in a file in a format suitable for visualization. + + Arguments: + + - ``path`` - save stacks file to this location; + - ``metric`` - metric to use: "self_cpu_time_total" or "self_cuda_time_total" + + .. note:: + Example of using FlameGraph tool: + + - git clone https://github.com/brendangregg/FlameGraph + - cd FlameGraph + - ./flamegraph.pl --title "CPU time" --countname "us." profiler.stacks > perf_viz.svg + """ + assert self.profiler + return self.profiler.export_stacks(path, metric) + def key_averages(self, group_by_input_shape: bool = False, group_by_stack_n: int = 0): """ Averages events, grouping them by operator name and (optionally) input shapes and