Skip to content

Commit

Permalink
Revert D25199264: Enable callgrind collection for C++ snippets
Browse files Browse the repository at this point in the history
Test Plan: revert-hammer

Differential Revision:
D25199264 (ff09729)

Original commit changeset: 529244054e4c

fbshipit-source-id: 7429d7154f92e097089bf51dc81042b766de9cc3
  • Loading branch information
Mike Ruberry authored and facebook-github-bot committed Dec 2, 2020
1 parent 6299c87 commit 6646ff1
Show file tree
Hide file tree
Showing 5 changed files with 25 additions and 127 deletions.
26 changes: 0 additions & 26 deletions test/benchmark_utils/test_benchmark_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -449,7 +449,6 @@ class MockCudaTimer(benchmark_utils.Timer):

@slowTest
@unittest.skipIf(IS_WINDOWS, "Valgrind is not supported on Windows.")
@unittest.skipIf(IS_SANDCASTLE, "Valgrind is OSS only.")
def test_collect_callgrind(self):
with self.assertRaisesRegex(
ValueError,
Expand Down Expand Up @@ -506,31 +505,6 @@ def add_one(x):
"JIT'd bindings are only for back testing."
)

@slowTest
@unittest.skipIf(IS_WINDOWS, "Valgrind is not supported on Windows.")
@unittest.skipIf(IS_SANDCASTLE, "Valgrind is OSS only.")
def test_collect_cpp_callgrind(self):
timer = benchmark_utils.Timer(
"x += 1;",
setup="torch::Tensor x = torch::ones({1});",
language="c++",
)
stats = [
timer.collect_callgrind()
for _ in range(3)
]
counts = [s.counts() for s in stats]

self.assertGreater(
min(counts), 0, "No stats were collected")
self.assertEqual(
min(counts), max(counts), "C++ Callgrind should be deterministic")

for s in stats:
self.assertEqual(
s.counts(denoise=True), s.counts(denoise=False),
"De-noising should not apply to C++.")

def test_manipulate_callgrind_stats(self):
stats_no_data, stats_with_data = load_callgrind_artifacts()

Expand Down
15 changes: 2 additions & 13 deletions torch/utils/benchmark/utils/cpp_jit.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import os
import re
import shutil
import tempfile
import textwrap
import threading
import uuid
Expand Down Expand Up @@ -31,8 +30,8 @@
# `setup` and `stmt` do not change, so we can reuse the executable from the
# first pass through the loop.
BUILD_ROOT = os.path.join(
tempfile.gettempdir(),
f"benchmark_utils_jit_build_{uuid.uuid4()}".replace("-", "")
torch._appdirs.user_cache_dir(appname="benchmark_utils_jit"),
f"build_{uuid.uuid4()}".replace("-", "")
)

# BACK_TESTING_NOTE:
Expand Down Expand Up @@ -142,13 +141,3 @@ def compile_timeit_template(stmt: str, setup: str) -> TimeitModuleType:
module = _compile_template(stmt, setup, src, is_standalone=False)
assert isinstance(module, TimeitModuleType)
return module


def compile_callgrind_template(stmt: str, setup: str) -> str:
template_path: str = os.path.join(SOURCE_ROOT, "valgrind_wrapper", "timer_callgrind_template.cpp")
with open(template_path, "rt") as f:
src: str = f.read()

target = _compile_template(stmt, setup, src, is_standalone=True)
assert isinstance(target, str)
return target
8 changes: 4 additions & 4 deletions torch/utils/benchmark/utils/timer.py
Original file line number Diff line number Diff line change
Expand Up @@ -421,15 +421,15 @@ def collect_callgrind(
if not isinstance(self._task_spec.stmt, str):
raise ValueError("`collect_callgrind` currently only supports string `stmt`")

if self._language != Language.PYTHON:
raise NotImplementedError("C++ Callgrind is later in the stack.")

# Check that the statement is valid. It doesn't guarantee success, but it's much
# simpler and quicker to raise an exception for a faulty `stmt` or `setup` in
# the parent process rather than the valgrind subprocess.
self._timer.timeit(1)
is_python = (self._language == Language.PYTHON)
assert is_python or not self._globals
return valgrind_timer_interface.wrapper_singleton().collect_callgrind(
task_spec=self._task_spec,
globals=self._globals,
number=number,
collect_baseline=collect_baseline and is_python,
is_python=is_python)
collect_baseline=collect_baseline)

This file was deleted.

56 changes: 19 additions & 37 deletions torch/utils/benchmark/utils/valgrind_wrapper/timer_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -483,13 +483,10 @@ def collect_callgrind(
task_spec: common.TaskSpec,
globals: Dict[str, Any],
number: int,
collect_baseline: bool,
is_python: bool,
collect_baseline: bool
) -> CallgrindStats:
"""Collect stats, and attach a reference run which can be used to filter interpreter overhead."""
self._validate()
assert is_python or not collect_baseline

baseline_inclusive_stats = FunctionCounts((), inclusive=True)
baseline_exclusive_stats = FunctionCounts((), inclusive=False)
if collect_baseline:
Expand All @@ -499,17 +496,15 @@ def collect_callgrind(
common.TaskSpec(
stmt="pass",
setup="pass",
num_threads=task_spec.num_threads,
num_threads=task_spec.num_threads
),
globals={},
number=number,
is_python=True,
)
baseline_inclusive_stats, baseline_exclusive_stats = \
self._baseline_cache[cache_key]

stmt_inclusive_stats, stmt_exclusive_stats = self._invoke(
task_spec, globals, number, is_python)
stmt_inclusive_stats, stmt_exclusive_stats = self._invoke(task_spec, globals, number)
return CallgrindStats(
task_spec=task_spec,
number_per_run=number,
Expand All @@ -525,7 +520,6 @@ def _invoke(
task_spec: common.TaskSpec,
globals: Dict[str, Any],
number: int,
is_python: bool,
) -> Tuple[FunctionCounts, FunctionCounts]:
"""Core invocation method for Callgrind collection.
Expand Down Expand Up @@ -571,34 +565,20 @@ def run(args: List[str], **kwargs: Any) -> Tuple[CompletedProcessType, str]:
f_stdout_stderr.close()

try:
if is_python:
if self._bindings_module is not None:
shutil.copy(
self._bindings_module.__file__,
os.path.join(working_dir, os.path.split(self._bindings_module.__file__)[1])
)

script_file = os.path.join(working_dir, "timer_callgrind.py")
with open(script_file, "wt") as f:
f.write(self._construct_script(
task_spec,
globals=GlobalsBridge(globals, data_dir),
number=number,
error_log=error_log,
stat_log=stat_log,
bindings=self._bindings_module))
run_loop_cmd = ["python", script_file]
else:
run_loop_exec = cpp_jit.compile_callgrind_template(
task_spec.stmt,
task_spec.setup,
if self._bindings_module is not None:
shutil.copy(
self._bindings_module.__file__,
os.path.join(working_dir, os.path.split(self._bindings_module.__file__)[1])
)
run_loop_cmd = [
run_loop_exec,
"--number", str(number),
"--number_warmup", str(min(number, 10)),
"--number_threads", str(task_spec.num_threads),
]

with open(script_file, "wt") as f:
f.write(self._construct_script(
task_spec,
globals=GlobalsBridge(globals, data_dir),
number=number,
error_log=error_log,
stat_log=stat_log,
bindings=self._bindings_module))

valgrind_invocation, valgrind_invocation_output = run([
"valgrind",
Expand All @@ -608,7 +588,9 @@ def run(args: List[str], **kwargs: Any) -> Tuple[CompletedProcessType, str]:
"--dump-instr=yes",
"--instr-atstart=yes",
"--collect-atstart=no",
] + run_loop_cmd)
"python",
script_file,
])

if valgrind_invocation.returncode:
error_report = ""
Expand Down

0 comments on commit 6646ff1

Please sign in to comment.