Update on "Use CapturedTraceback symbolizer for C++ exceptions from P…

…ython library" This is the cheap and cheerful implementation, which is only enabled on TORCH_SHOW_CPP_STACKTRACES, because it *eagerly* symbolizes immediately at exception throw time, even if the exception will end up getting caught. It would be better to do this lazily and only symbolize when we try to print the exception, but that requires a more involved refactor of c10::Error that I don't feel like doing. Compare the output before: ``` frame #0: c10::Error::Error(c10::SourceLocation, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >) + 0x95 (0x7fa21b99d975 in /data/users/ezyang/c/pytorch/torch/lib/libc10.so) frame #1: c10::TensorImpl::throw_cannot_call_with_symbolic(char const*) const + 0x8d (0x7fa21b951269 in /data/users/ezyang/c/pytorch/torch/lib/libc10.so) frame #2: c10::TensorImpl::sizes_custom() const + 0x9f (0x7fa21b9770df in /data/users/ezyang/c/pytorch/torch/lib/libc10.so) frame #3: at::meta::structured_mm::meta(at::Tensor const&, at::Tensor const&) + 0x31e (0x7fa20a202a8e in /data/users/ezyang/c/pytorch/torch/lib/libtorch_cpu.so) frame #4: <unknown function> + 0x29f34de (0x7fa20b5f34de in /data/users/ezyang/c/pytorch/torch/lib/libtorch_cpu.so) frame #5: <unknown function> + 0x2a1fd8e (0x7fa20b61fd8e in /data/users/ezyang/c/pytorch/torch/lib/libtorch_cpu.so) frame #6: <unknown function> + 0x6b907b (0x7fa2142b907b in /data/users/ezyang/c/pytorch/torch/lib/libtorch_python.so) frame #7: <unknown function> + 0x6b6175 (0x7fa2142b6175 in /data/users/ezyang/c/pytorch/torch/lib/libtorch_python.so) ``` and after: ``` #1 torch::CapturedTraceback::gather(bool, bool, bool) from ??:0 #2 THPModule_initExtension(_object*, _object*)::{lambda()#1}::operator()() const [clone .constprop.0] from Module.cpp:0 #3 std::_Function_handler<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > (), THPModule_initExtension(_object*, _object*)::{lambda()#1}>::_M_invoke(std::_Any_data const&) from Module.cpp:0 #4 c10::Error::Error(c10::SourceLocation, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >) from ??:0 #5 c10::TensorImpl::throw_cannot_call_with_symbolic(char const*) const from ??:0 #6 c10::TensorImpl::sizes_custom() const [clone .localalias] from TensorImpl.cpp:0 #7 at::meta::structured_mm::meta(at::Tensor const&, at::Tensor const&) from ??:0 #8 at::(anonymous namespace)::wrapper_Meta_mm_out_out(at::Tensor const&, at::Tensor const&, at::Tensor&) from RegisterMeta.cpp:0 #9 c10::impl::make_boxed_from_unboxed_functor<c10::impl::detail::WrapFunctionIntoFunctor_<c10::CompileTimeFunctionPointer<at::Tensor& (at::Tensor const&, at::Tensor const&, at::Tensor&), &at::(anonymous namespace)::wrapper_Meta_mm_out_out>, at::Tensor&, c10::guts::typelist::typelist<at::Tensor const&, at::Tensor const&, at::Tensor&> >, false>::call(c10::OperatorKernel*, c10::OperatorHandle const&, c10::DispatchKeySet, std::vector<c10::IValue, std::allocator<c10::IValue> >*) from RegisterMeta.cpp:0 ``` Signed-off-by: Edward Z. Yang <ezyangmeta.com> [ghstack-poisoned]
pytorch · Nov 8, 2023 · 0a83847 · 0a83847
1 parent e1c3aab
commit 0a83847
Show file tree

Hide file tree

Showing 3 changed files with 27 additions and 3 deletions.
diff --git a/torch/csrc/Module.cpp b/torch/csrc/Module.cpp
@@ -145,11 +145,11 @@ static PyObject* THPModule_initExtension(
     PyObject* _unused,
     PyObject* shm_manager_path) {
   HANDLE_TH_ERRORS
-  if (torch::get_cpp_stacktraces_enabled()) {
+  if (torch::get_cpp_stacktraces_enabled() && !torch::get_disable_addr2line()) {
     c10::SetStackTraceFetcher([]() -> std::string {
       auto tb = torch::CapturedTraceback::gather(false, false, true);
       LOG(WARNING)
-          << "symbolizing C++ stack trace for exception, this may take a while..."
+          << "symbolizing C++ stack trace for exception; if this hangs, rerun with TORCH_DISABLE_ADDR2LINE=1..."
           << std::endl;
       auto s_tbs = torch::symbolize({tb.get()});
       std::stringstream oss;

diff --git a/torch/csrc/utils/cpp_stacktraces.cpp b/torch/csrc/utils/cpp_stacktraces.cpp
@@ -23,10 +23,33 @@ bool compute_cpp_stack_traces_enabled() {
   }
   return false;
 }
+
+bool compute_disable_addr2line() {
+  auto envar = std::getenv("TORCH_DISABLE_ADDR2LINE");
+  if (envar) {
+    if (strcmp(envar, "0") == 0) {
+      return false;
+    }
+    if (strcmp(envar, "1") == 0) {
+      return true;
+    }
+    TORCH_WARN(
+        "ignoring invalid value for TORCH_DISABLE_ADDR2LINE: ",
+        envar,
+        " valid values are 0 or 1.");
+  }
+  return false;
+}
 } // namespace
 
 bool get_cpp_stacktraces_enabled() {
   static bool enabled = compute_cpp_stack_traces_enabled();
   return enabled;
 }
+
+bool get_disable_addr2line() {
+  static bool disabled = compute_disable_addr2line();
+  return disabled;
+}
+
 } // namespace torch
diff --git a/torch/csrc/utils/cpp_stacktraces.h b/torch/csrc/utils/cpp_stacktraces.h
@@ -4,4 +4,5 @@
 
 namespace torch {
 TORCH_API bool get_cpp_stacktraces_enabled();
-}
+TORCH_API bool get_disable_addr2line();
+} // namespace torch