Output stacks (support for SVG visualization) (#48438)

Summary: Pull Request resolved: #48438 Outputting stacks in a format suitable for SVG vizualization (e.g. with https://github.com/brendangregg/FlameGraph tool) Test Plan: python test/test_profiler.py -k test_export_stacks e.g. resnet18 (note: actual SVG is interactive): <img width="1193" alt="Screen Shot 2020-11-24 at 7 06 27 PM" src="https://user-images.githubusercontent.com/30845429/100178160-397f3500-2e88-11eb-81c4-34b19c5fcb87.png"> Reviewed By: dzhulgakov Differential Revision: D25174270 Pulled By: ilia-cher fbshipit-source-id: 6b60084071b209441805c468f5ff777318e42d1a
pytorch · Dec 19, 2020 · 485aee7 · 485aee7
1 parent d0a12c5
commit 485aee7
Show file tree

Hide file tree

Showing 3 changed files with 67 additions and 2 deletions.
diff --git a/test/test_profiler.py b/test/test_profiler.py
@@ -2,6 +2,7 @@
 import gc
 import unittest
 
+import tempfile
 import torch
 import torch.nn as nn
 import torch.optim
@@ -281,6 +282,26 @@ def trace_handler(p):
         print(p.key_averages().table(
             sort_by="self_cuda_time_total", row_limit=-1))
 
+    def test_export_stacks(self):
+        with profile(with_stack=True, use_kineto=kineto_available()) as p:
+            x = torch.randn(10, 10)
+            y = torch.randn(10, 10)
+            z = torch.mm(x, y)
+            z = z + y
+
+        with tempfile.NamedTemporaryFile(mode="w+") as f:
+            p.export_stacks(f.name)
+            lines = f.readlines()
+            assert len(lines) > 0, "Empty stacks file"
+            for line in lines:
+                is_int = False
+                try:
+                    assert int(line.split(" ")[-1]) > 0, "Invalid stacks record"
+                    is_int = True
+                except ValueError:
+                    pass
+                assert is_int, "Invalid stacks record"
+
 
 if __name__ == '__main__':
     run_tests()
diff --git a/torch/autograd/profiler.py b/torch/autograd/profiler.py
@@ -266,6 +266,25 @@ def export_chrome_trace(self, path):
             f.truncate()
             f.write("]")
 
+    def supported_export_stacks_metrics(self):
+        return ["self_cpu_time_total", "self_cuda_time_total"]
+
+    def export_stacks(self, path: str, metric: str):
+        if metric not in self.supported_export_stacks_metrics():
+            raise ValueError("metric should be one of: " + str(self.supported_export_stacks_metrics()))
+        translate_table = str.maketrans(" ;\t\n", "____")
+        with open(path, 'w') as f:
+            for evt in self:
+                if evt.stack and len(evt.stack) > 0:
+                    metric_value = getattr(evt, metric)
+                    if int(metric_value) > 0:
+                        stack_str = ""
+                        for entry in reversed(evt.stack):
+                            stack_str += entry.translate(translate_table)
+                            stack_str += ";"
+                        stack_str = stack_str[:-1] + " " + str(int(metric_value))
+                        f.write(stack_str + "\n")
+
     def key_averages(self, group_by_input_shapes=False, group_by_stack_n=0):
         """Averages all function events over their keys.
 
@@ -522,15 +541,21 @@ def export_chrome_trace(self, path):
             return self.function_events.export_chrome_trace(path)
     export_chrome_trace.__doc__ = EventList.export_chrome_trace.__doc__
 
+    def export_stacks(self, path: str, metric: str = "self_cpu_time_total"):
+        self._check_finish()
+        assert self.function_events is not None, "Expected profiling results"
+        assert self.with_stack, "export_stacks() requires with_stack=True"
+        return self.function_events.export_stacks(path, metric)
+
     def key_averages(self, group_by_input_shape=False, group_by_stack_n=0):
         self._check_finish()
-        assert self.function_events is not None
+        assert self.function_events is not None, "Expected profiling results"
         return self.function_events.key_averages(group_by_input_shape, group_by_stack_n)
     key_averages.__doc__ = EventList.key_averages.__doc__
 
     def total_average(self):
         self._check_finish()
-        assert self.function_events is not None
+        assert self.function_events is not None, "Expected profiling results"
         return self.function_events.total_average()
     total_average.__doc__ = EventList.total_average.__doc__
 

diff --git a/torch/profiler/profiler.py b/torch/profiler/profiler.py
@@ -245,6 +245,25 @@ def export_chrome_trace(self, path: str):
         assert self.profiler
         return self.profiler.export_chrome_trace(path)
 
+    def export_stacks(self, path: str, metric: str = "self_cpu_time_total"):
+        """
+        Save stack traces in a file in a format suitable for visualization.
+
+        Arguments:
+
+        - ``path`` - save stacks file to this location;
+        - ``metric`` - metric to use: "self_cpu_time_total" or "self_cuda_time_total"
+
+        .. note::
+            Example of using FlameGraph tool:
+
+            - git clone https://github.com/brendangregg/FlameGraph
+            - cd FlameGraph
+            - ./flamegraph.pl --title "CPU time" --countname "us." profiler.stacks > perf_viz.svg
+        """
+        assert self.profiler
+        return self.profiler.export_stacks(path, metric)
+
     def key_averages(self, group_by_input_shape: bool = False, group_by_stack_n: int = 0):
         """
         Averages events, grouping them by operator name and (optionally) input shapes and