[mlir][python] Cache import of ir module in type casters. #160000

hawkinsp · 2025-09-21T19:21:13Z

In a JAX benchmark that traces a large language model, this change reduces the time spent in nanobind::module::import_ from 1.2s to 10ms.

llvmbot · 2025-09-21T19:21:43Z

@llvm/pr-subscribers-mlir

Author: Peter Hawkins (hawkinsp)

Changes

In a JAX benchmark that traces a large language model, this change reduces the time spent in nanobind::module::import_ from 1.2s to 10ms.

Full diff: https://github.com/llvm/llvm-project/pull/160000.diff

1 Files Affected:

(modified) mlir/include/mlir/Bindings/Python/NanobindAdaptors.h (+70-29)

diff --git a/mlir/include/mlir/Bindings/Python/NanobindAdaptors.h b/mlir/include/mlir/Bindings/Python/NanobindAdaptors.h
index 8744d8d0e4bca..aeb51542f9b6d 100644
--- a/mlir/include/mlir/Bindings/Python/NanobindAdaptors.h
+++ b/mlir/include/mlir/Bindings/Python/NanobindAdaptors.h
@@ -19,7 +19,9 @@
 #ifndef MLIR_BINDINGS_PYTHON_NANOBINDADAPTORS_H
 #define MLIR_BINDINGS_PYTHON_NANOBINDADAPTORS_H
 
+#include <atomic>
 #include <cstdint>
+#include <memory>
 #include <optional>
 
 #include "mlir-c/Diagnostics.h"
@@ -30,6 +32,56 @@
 // clang-format on
 #include "llvm/ADT/Twine.h"
 
+namespace mlir {
+namespace python {
+namespace {
+
+// Safely calls Python initialization code on first use, avoiding deadlocks.
+template <typename T> class SafeInit {
+public:
+  typedef std::unique_ptr<T> (*F)();
+
+  explicit SafeInit(F init_fn) : init_fn_(init_fn) {}
+
+  T &Get() {
+    if (T *result = output_.load()) {
+      return *result;
+    }
+
+    // Note: init_fn() may be called multiple times if, for example, the GIL is
+    // released during its execution. The intended use case is for module
+    // imports which are safe to perform multiple times. We are careful not to
+    // hold a lock across init_fn() to avoid lock ordering problems.
+    std::unique_ptr<T> m = init_fn_();
+    {
+      nanobind::ft_lock_guard lock(mu_);
+      if (T *result = output_.load()) {
+        return *result;
+      }
+      T *p = m.release();
+      output_.store(p);
+      return *p;
+    }
+  }
+
+private:
+  nanobind::ft_mutex mu_;
+  std::atomic<T *> output_{nullptr};
+  F init_fn_;
+};
+
+nanobind::module_ &IrModule() {
+  static SafeInit<nanobind::module_> init([]() {
+    return std::make_unique<nanobind::module_>(
+        nanobind::module_::import_(MAKE_MLIR_PYTHON_QUALNAME("ir")));
+  });
+  return init.Get();
+}
+
+} // namespace
+} // namespace python
+} // namespace mlir
+
 // Raw CAPI type casters need to be declared before use, so always include them
 // first.
 namespace nanobind {
@@ -75,7 +127,7 @@ struct type_caster<MlirAffineMap> {
                          cleanup_list *cleanup) noexcept {
     nanobind::object capsule =
         nanobind::steal<nanobind::object>(mlirPythonAffineMapToCapsule(v));
-    return nanobind::module_::import_(MAKE_MLIR_PYTHON_QUALNAME("ir"))
+    return mlir::python::IrModule()
         .attr("AffineMap")
         .attr(MLIR_PYTHON_CAPI_FACTORY_ATTR)(capsule)
         .release();
@@ -97,7 +149,7 @@ struct type_caster<MlirAttribute> {
                          cleanup_list *cleanup) noexcept {
     nanobind::object capsule =
         nanobind::steal<nanobind::object>(mlirPythonAttributeToCapsule(v));
-    return nanobind::module_::import_(MAKE_MLIR_PYTHON_QUALNAME("ir"))
+    return mlir::python::IrModule()
         .attr("Attribute")
         .attr(MLIR_PYTHON_CAPI_FACTORY_ATTR)(capsule)
         .attr(MLIR_PYTHON_MAYBE_DOWNCAST_ATTR)()
@@ -128,9 +180,7 @@ struct type_caster<MlirContext> {
       // TODO: This raises an error of "No current context" currently.
       // Update the implementation to pretty-print the helpful error that the
       // core implementations print in this case.
-      src = nanobind::module_::import_(MAKE_MLIR_PYTHON_QUALNAME("ir"))
-                .attr("Context")
-                .attr("current");
+      src = mlir::python::IrModule().attr("Context").attr("current");
     }
     std::optional<nanobind::object> capsule = mlirApiObjectToCapsule(src);
     value = mlirPythonCapsuleToContext(capsule->ptr());
@@ -153,7 +203,7 @@ struct type_caster<MlirDialectRegistry> {
                          cleanup_list *cleanup) noexcept {
     nanobind::object capsule = nanobind::steal<nanobind::object>(
         mlirPythonDialectRegistryToCapsule(v));
-    return nanobind::module_::import_(MAKE_MLIR_PYTHON_QUALNAME("ir"))
+    return mlir::python::IrModule()
         .attr("DialectRegistry")
         .attr(MLIR_PYTHON_CAPI_FACTORY_ATTR)(capsule)
         .release();
@@ -167,9 +217,7 @@ struct type_caster<MlirLocation> {
   bool from_python(handle src, uint8_t flags, cleanup_list *cleanup) noexcept {
     if (src.is_none()) {
       // Gets the current thread-bound context.
-      src = nanobind::module_::import_(MAKE_MLIR_PYTHON_QUALNAME("ir"))
-                .attr("Location")
-                .attr("current");
+      src = mlir::python::IrModule().attr("Location").attr("current");
     }
     if (auto capsule = mlirApiObjectToCapsule(src)) {
       value = mlirPythonCapsuleToLocation(capsule->ptr());
@@ -181,7 +229,7 @@ struct type_caster<MlirLocation> {
                          cleanup_list *cleanup) noexcept {
     nanobind::object capsule =
         nanobind::steal<nanobind::object>(mlirPythonLocationToCapsule(v));
-    return nanobind::module_::import_(MAKE_MLIR_PYTHON_QUALNAME("ir"))
+    return mlir::python::IrModule()
         .attr("Location")
         .attr(MLIR_PYTHON_CAPI_FACTORY_ATTR)(capsule)
         .release();
@@ -203,7 +251,7 @@ struct type_caster<MlirModule> {
                          cleanup_list *cleanup) noexcept {
     nanobind::object capsule =
         nanobind::steal<nanobind::object>(mlirPythonModuleToCapsule(v));
-    return nanobind::module_::import_(MAKE_MLIR_PYTHON_QUALNAME("ir"))
+    return mlir::python::IrModule()
         .attr("Module")
         .attr(MLIR_PYTHON_CAPI_FACTORY_ATTR)(capsule)
         .release();
@@ -250,7 +298,7 @@ struct type_caster<MlirOperation> {
       return nanobind::none();
     nanobind::object capsule =
         nanobind::steal<nanobind::object>(mlirPythonOperationToCapsule(v));
-    return nanobind::module_::import_(MAKE_MLIR_PYTHON_QUALNAME("ir"))
+    return mlir::python::IrModule()
         .attr("Operation")
         .attr(MLIR_PYTHON_CAPI_FACTORY_ATTR)(capsule)
         .release();
@@ -274,7 +322,7 @@ struct type_caster<MlirValue> {
       return nanobind::none();
     nanobind::object capsule =
         nanobind::steal<nanobind::object>(mlirPythonValueToCapsule(v));
-    return nanobind::module_::import_(MAKE_MLIR_PYTHON_QUALNAME("ir"))
+    return mlir::python::IrModule()
         .attr("Value")
         .attr(MLIR_PYTHON_CAPI_FACTORY_ATTR)(capsule)
         .attr(MLIR_PYTHON_MAYBE_DOWNCAST_ATTR)()
@@ -312,7 +360,7 @@ struct type_caster<MlirTypeID> {
       return nanobind::none();
     nanobind::object capsule =
         nanobind::steal<nanobind::object>(mlirPythonTypeIDToCapsule(v));
-    return nanobind::module_::import_(MAKE_MLIR_PYTHON_QUALNAME("ir"))
+    return mlir::python::IrModule()
         .attr("TypeID")
         .attr(MLIR_PYTHON_CAPI_FACTORY_ATTR)(capsule)
         .release();
@@ -334,7 +382,7 @@ struct type_caster<MlirType> {
                          cleanup_list *cleanup) noexcept {
     nanobind::object capsule =
         nanobind::steal<nanobind::object>(mlirPythonTypeToCapsule(t));
-    return nanobind::module_::import_(MAKE_MLIR_PYTHON_QUALNAME("ir"))
+    return mlir::python::IrModule()
         .attr("Type")
         .attr(MLIR_PYTHON_CAPI_FACTORY_ATTR)(capsule)
         .attr(MLIR_PYTHON_MAYBE_DOWNCAST_ATTR)()
@@ -453,11 +501,9 @@ class mlir_attribute_subclass : public pure_subclass {
   mlir_attribute_subclass(nanobind::handle scope, const char *attrClassName,
                           IsAFunctionTy isaFunction,
                           GetTypeIDFunctionTy getTypeIDFunction = nullptr)
-      : mlir_attribute_subclass(
-            scope, attrClassName, isaFunction,
-            nanobind::module_::import_(MAKE_MLIR_PYTHON_QUALNAME("ir"))
-                .attr("Attribute"),
-            getTypeIDFunction) {}
+      : mlir_attribute_subclass(scope, attrClassName, isaFunction,
+                                IrModule().attr("Attribute"),
+                                getTypeIDFunction) {}
 
   /// Subclasses with a provided mlir.ir.Attribute super-class. This must
   /// be used if the subclass is being defined in the same extension module
@@ -540,11 +586,8 @@ class mlir_type_subclass : public pure_subclass {
   mlir_type_subclass(nanobind::handle scope, const char *typeClassName,
                      IsAFunctionTy isaFunction,
                      GetTypeIDFunctionTy getTypeIDFunction = nullptr)
-      : mlir_type_subclass(
-            scope, typeClassName, isaFunction,
-            nanobind::module_::import_(MAKE_MLIR_PYTHON_QUALNAME("ir"))
-                .attr("Type"),
-            getTypeIDFunction) {}
+      : mlir_type_subclass(scope, typeClassName, isaFunction,
+                           IrModule().attr("Type"), getTypeIDFunction) {}
 
   /// Subclasses with a provided mlir.ir.Type super-class. This must
   /// be used if the subclass is being defined in the same extension module
@@ -631,10 +674,8 @@ class mlir_value_subclass : public pure_subclass {
   /// Subclasses by looking up the super-class dynamically.
   mlir_value_subclass(nanobind::handle scope, const char *valueClassName,
                       IsAFunctionTy isaFunction)
-      : mlir_value_subclass(
-            scope, valueClassName, isaFunction,
-            nanobind::module_::import_(MAKE_MLIR_PYTHON_QUALNAME("ir"))
-                .attr("Value")) {}
+      : mlir_value_subclass(scope, valueClassName, isaFunction,
+                            IrModule().attr("Value")) {}
 
   /// Subclasses with a provided mlir.ir.Value super-class. This must
   /// be used if the subclass is being defined in the same extension module

github-actions · 2025-09-21T19:24:21Z

✅ With the latest revision this PR passed the C/C++ code formatter.

ftynse

LGTM modulo LLVM style nits

mlir/include/mlir/Bindings/Python/NanobindAdaptors.h

In a JAX benchmark that traces a large language model, this change reduces the time spent in nanobind::module::import_ from 1.2s to 10ms.

hawkinsp · 2025-09-22T12:48:52Z

All done. Would you please merge? Thanks!

wjakob · 2025-10-13T09:40:19Z

@hawkinsp I am trying to understand the issue that required this commit. Shouldn't importing a module basically be a no-op once it's imported? Are there things that should be changed in nanobind?

hawkinsp · 2025-10-13T15:42:39Z

@wjakob All that is going on here is these type casters are called many times, perhaps 10^6 times or more in the benchmark.

Here's a CPU flame graph from a sampling profiler under CPython 3.12 that shows the problem (not the exact same benchmark as the original, hence the different timing):

It might make sense for nanobind to cache imports perhaps, or to simply make the CPython import logic faster in the case that we repeatedly make the same import.

wjakob · 2025-10-14T08:20:44Z

I would like to find out two things:

Who is making those expensive nb::module_::import_() calls? Is it a specific type caster in nanobind that is calling nb::module::import_? Is it ndarray? Anything else? Or are the import from MLIR code?
If nanobind was to offer a cached nb::import variant, how should it be implemented. Just keep a nb::dict in the internals to potentially avoid the call to the CPython module import function? Actually, CPython does not really import a module if it is already imported. I would assume that CPython likewise does a dictionary lookup and turns this into a no-op, so we would be duplicating existing functionality. So how can it take so long?

llvmbot added the mlir label Sep 21, 2025

hawkinsp force-pushed the inits branch from 4006133 to 53e772e Compare September 21, 2025 19:24

ftynse approved these changes Sep 22, 2025

View reviewed changes

[mlir][python] Cache import of ir module in type casters.

84812a5

In a JAX benchmark that traces a large language model, this change reduces the time spent in nanobind::module::import_ from 1.2s to 10ms.

hawkinsp force-pushed the inits branch from 53e772e to 84812a5 Compare September 22, 2025 12:27

jpienaar merged commit b1e00f6 into llvm:main Sep 24, 2025
9 checks passed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[mlir][python] Cache import of ir module in type casters. #160000

[mlir][python] Cache import of ir module in type casters. #160000

hawkinsp commented Sep 21, 2025

Uh oh!

llvmbot commented Sep 21, 2025

Uh oh!

github-actions bot commented Sep 21, 2025 •

edited

Loading

Uh oh!

ftynse left a comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

hawkinsp commented Sep 22, 2025

Uh oh!

Uh oh!

wjakob commented Oct 13, 2025

Uh oh!

hawkinsp commented Oct 13, 2025 •

edited

Loading

Uh oh!

wjakob commented Oct 14, 2025

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

5 participants

[mlir][python] Cache import of ir module in type casters. #160000

[mlir][python] Cache import of ir module in type casters. #160000

Conversation

hawkinsp commented Sep 21, 2025

Uh oh!

llvmbot commented Sep 21, 2025

Uh oh!

github-actions bot commented Sep 21, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

ftynse left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

hawkinsp commented Sep 22, 2025

Uh oh!

Uh oh!

wjakob commented Oct 13, 2025

Uh oh!

hawkinsp commented Oct 13, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

wjakob commented Oct 14, 2025

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

5 participants

github-actions bot commented Sep 21, 2025 •

edited

Loading

hawkinsp commented Oct 13, 2025 •

edited

Loading