diff --git a/backends/cuda/cuda_backend.py b/backends/cuda/cuda_backend.py index 8ed8cdefbb1..ef98de29f23 100644 --- a/backends/cuda/cuda_backend.py +++ b/backends/cuda/cuda_backend.py @@ -129,6 +129,8 @@ def preprocess( user_input_placeholders.append(node.meta["val"]) options: dict[str, typing.Any] = { + # Better model precision + "emulate_precision_casts": True, # Embed CUDA kernel binaries directly into the compiled shared object "aot_inductor.embed_kernel_binary": True, # Do not link against the full PyTorch/libtorch library