Revert D25199264: Enable callgrind collection for C++ snippets

Test Plan: revert-hammer Differential Revision: D25199264 (ff09729) Original commit changeset: 529244054e4c fbshipit-source-id: 7429d7154f92e097089bf51dc81042b766de9cc3
pytorch · Dec 2, 2020 · 6646ff1 · 6646ff1
1 parent 6299c87
commit 6646ff1
Show file tree

Hide file tree

Showing 5 changed files with 25 additions and 127 deletions.
diff --git a/test/benchmark_utils/test_benchmark_utils.py b/test/benchmark_utils/test_benchmark_utils.py
@@ -449,7 +449,6 @@ class MockCudaTimer(benchmark_utils.Timer):
 
     @slowTest
     @unittest.skipIf(IS_WINDOWS, "Valgrind is not supported on Windows.")
-    @unittest.skipIf(IS_SANDCASTLE, "Valgrind is OSS only.")
     def test_collect_callgrind(self):
         with self.assertRaisesRegex(
             ValueError,
@@ -506,31 +505,6 @@ def add_one(x):
             "JIT'd bindings are only for back testing."
         )
 
-    @slowTest
-    @unittest.skipIf(IS_WINDOWS, "Valgrind is not supported on Windows.")
-    @unittest.skipIf(IS_SANDCASTLE, "Valgrind is OSS only.")
-    def test_collect_cpp_callgrind(self):
-        timer = benchmark_utils.Timer(
-            "x += 1;",
-            setup="torch::Tensor x = torch::ones({1});",
-            language="c++",
-        )
-        stats = [
-            timer.collect_callgrind()
-            for _ in range(3)
-        ]
-        counts = [s.counts() for s in stats]
-
-        self.assertGreater(
-            min(counts), 0, "No stats were collected")
-        self.assertEqual(
-            min(counts), max(counts), "C++ Callgrind should be deterministic")
-
-        for s in stats:
-            self.assertEqual(
-                s.counts(denoise=True), s.counts(denoise=False),
-                "De-noising should not apply to C++.")
-
     def test_manipulate_callgrind_stats(self):
         stats_no_data, stats_with_data = load_callgrind_artifacts()
 

diff --git a/torch/utils/benchmark/utils/cpp_jit.py b/torch/utils/benchmark/utils/cpp_jit.py
@@ -3,7 +3,6 @@
 import os
 import re
 import shutil
-import tempfile
 import textwrap
 import threading
 import uuid
@@ -31,8 +30,8 @@
 # `setup` and `stmt` do not change, so we can reuse the executable from the
 # first pass through the loop.
 BUILD_ROOT = os.path.join(
-    tempfile.gettempdir(),
-    f"benchmark_utils_jit_build_{uuid.uuid4()}".replace("-", "")
+    torch._appdirs.user_cache_dir(appname="benchmark_utils_jit"),
+    f"build_{uuid.uuid4()}".replace("-", "")
 )
 
 # BACK_TESTING_NOTE:
@@ -142,13 +141,3 @@ def compile_timeit_template(stmt: str, setup: str) -> TimeitModuleType:
     module = _compile_template(stmt, setup, src, is_standalone=False)
     assert isinstance(module, TimeitModuleType)
     return module
-
-
-def compile_callgrind_template(stmt: str, setup: str) -> str:
-    template_path: str = os.path.join(SOURCE_ROOT, "valgrind_wrapper", "timer_callgrind_template.cpp")
-    with open(template_path, "rt") as f:
-        src: str = f.read()
-
-    target = _compile_template(stmt, setup, src, is_standalone=True)
-    assert isinstance(target, str)
-    return target
diff --git a/torch/utils/benchmark/utils/timer.py b/torch/utils/benchmark/utils/timer.py
@@ -421,15 +421,15 @@ def collect_callgrind(
         if not isinstance(self._task_spec.stmt, str):
             raise ValueError("`collect_callgrind` currently only supports string `stmt`")
 
+        if self._language != Language.PYTHON:
+            raise NotImplementedError("C++ Callgrind is later in the stack.")
+
         # Check that the statement is valid. It doesn't guarantee success, but it's much
         # simpler and quicker to raise an exception for a faulty `stmt` or `setup` in
         # the parent process rather than the valgrind subprocess.
         self._timer.timeit(1)
-        is_python = (self._language == Language.PYTHON)
-        assert is_python or not self._globals
         return valgrind_timer_interface.wrapper_singleton().collect_callgrind(
             task_spec=self._task_spec,
             globals=self._globals,
             number=number,
-            collect_baseline=collect_baseline and is_python,
-            is_python=is_python)
+            collect_baseline=collect_baseline)
diff --git a/torch/utils/benchmark/utils/valgrind_wrapper/timer_callgrind_template.cpp b/torch/utils/benchmark/utils/valgrind_wrapper/timer_callgrind_template.cpp
diff --git a/torch/utils/benchmark/utils/valgrind_wrapper/timer_interface.py b/torch/utils/benchmark/utils/valgrind_wrapper/timer_interface.py
@@ -483,13 +483,10 @@ def collect_callgrind(
         task_spec: common.TaskSpec,
         globals: Dict[str, Any],
         number: int,
-        collect_baseline: bool,
-        is_python: bool,
+        collect_baseline: bool
     ) -> CallgrindStats:
         """Collect stats, and attach a reference run which can be used to filter interpreter overhead."""
         self._validate()
-        assert is_python or not collect_baseline
-
         baseline_inclusive_stats = FunctionCounts((), inclusive=True)
         baseline_exclusive_stats = FunctionCounts((), inclusive=False)
         if collect_baseline:
@@ -499,17 +496,15 @@ def collect_callgrind(
                     common.TaskSpec(
                         stmt="pass",
                         setup="pass",
-                        num_threads=task_spec.num_threads,
+                        num_threads=task_spec.num_threads
                     ),
                     globals={},
                     number=number,
-                    is_python=True,
                 )
             baseline_inclusive_stats, baseline_exclusive_stats = \
                 self._baseline_cache[cache_key]
 
-        stmt_inclusive_stats, stmt_exclusive_stats = self._invoke(
-            task_spec, globals, number, is_python)
+        stmt_inclusive_stats, stmt_exclusive_stats = self._invoke(task_spec, globals, number)
         return CallgrindStats(
             task_spec=task_spec,
             number_per_run=number,
@@ -525,7 +520,6 @@ def _invoke(
         task_spec: common.TaskSpec,
         globals: Dict[str, Any],
         number: int,
-        is_python: bool,
     ) -> Tuple[FunctionCounts, FunctionCounts]:
         """Core invocation method for Callgrind collection.
 
@@ -571,34 +565,20 @@ def run(args: List[str], **kwargs: Any) -> Tuple[CompletedProcessType, str]:
                 f_stdout_stderr.close()
 
         try:
-            if is_python:
-                if self._bindings_module is not None:
-                    shutil.copy(
-                        self._bindings_module.__file__,
-                        os.path.join(working_dir, os.path.split(self._bindings_module.__file__)[1])
-                    )
-
-                script_file = os.path.join(working_dir, "timer_callgrind.py")
-                with open(script_file, "wt") as f:
-                    f.write(self._construct_script(
-                        task_spec,
-                        globals=GlobalsBridge(globals, data_dir),
-                        number=number,
-                        error_log=error_log,
-                        stat_log=stat_log,
-                        bindings=self._bindings_module))
-                run_loop_cmd = ["python", script_file]
-            else:
-                run_loop_exec = cpp_jit.compile_callgrind_template(
-                    task_spec.stmt,
-                    task_spec.setup,
+            if self._bindings_module is not None:
+                shutil.copy(
+                    self._bindings_module.__file__,
+                    os.path.join(working_dir, os.path.split(self._bindings_module.__file__)[1])
                 )
-                run_loop_cmd = [
-                    run_loop_exec,
-                    "--number", str(number),
-                    "--number_warmup", str(min(number, 10)),
-                    "--number_threads", str(task_spec.num_threads),
-                ]
+
+            with open(script_file, "wt") as f:
+                f.write(self._construct_script(
+                    task_spec,
+                    globals=GlobalsBridge(globals, data_dir),
+                    number=number,
+                    error_log=error_log,
+                    stat_log=stat_log,
+                    bindings=self._bindings_module))
 
             valgrind_invocation, valgrind_invocation_output = run([
                 "valgrind",
@@ -608,7 +588,9 @@ def run(args: List[str], **kwargs: Any) -> Tuple[CompletedProcessType, str]:
                 "--dump-instr=yes",
                 "--instr-atstart=yes",
                 "--collect-atstart=no",
-            ] + run_loop_cmd)
+                "python",
+                script_file,
+            ])
 
             if valgrind_invocation.returncode:
                 error_report = ""