From f9d02b053b9e93a93c294bbbd96b85d599f9d50e Mon Sep 17 00:00:00 2001 From: gasoonjia Date: Wed, 8 Oct 2025 16:04:35 -0700 Subject: [PATCH 1/2] remove extra libtorch dependency this diff removes extra libtorch dependency on cuda delegate, and introd missing bf16 and int64 shim layers Differential Revision: [D84207378](https://our.internmc.facebook.com/intern/diff/D84207378/) ghstack-source-id: 314984331 Pull Request resolved: https://github.com/pytorch/executorch/pull/14919 --- backends/aoti/CMakeLists.txt | 2 +- backends/aoti/common_shims.cpp | 9 ++++++++- backends/aoti/common_shims.h | 2 ++ backends/cuda/CMakeLists.txt | 5 +---- 4 files changed, 12 insertions(+), 6 deletions(-) diff --git a/backends/aoti/CMakeLists.txt b/backends/aoti/CMakeLists.txt index ce364f2c4b0..845144af50f 100644 --- a/backends/aoti/CMakeLists.txt +++ b/backends/aoti/CMakeLists.txt @@ -40,7 +40,7 @@ target_compile_options(aoti_common PUBLIC -fexceptions -frtti -fPIC) # Ensure symbols are exported properly target_link_options(aoti_common PUBLIC -Wl,--export-dynamic) -# Link against PyTorch libraries and standard libraries +# Link against ExecuTorch libraries and standard libraries target_link_libraries(aoti_common PUBLIC extension_tensor ${CMAKE_DL_LIBS}) executorch_target_link_options_shared_lib(aoti_common) diff --git a/backends/aoti/common_shims.cpp b/backends/aoti/common_shims.cpp index 2f9b36e3c4f..abc83779443 100644 --- a/backends/aoti/common_shims.cpp +++ b/backends/aoti/common_shims.cpp @@ -127,11 +127,18 @@ int32_t aoti_torch_layout_strided() { } // Dtype constants - these return the PyTorch dtype codes -// Currently only float32 is supported, but using robust enum-based approach int32_t aoti_torch_dtype_float32() { return 6; // PyTorch's float32 dtype code } +int32_t aoti_torch_dtype_bfloat16() { + return 15; // PyTorch's bfloat16 dtype code +} + +int32_t aoti_torch_dtype_int64() { + return 4; // PyTorch's int64 dtype code +} + // Cleanup functions void cleanup_tensor_metadata() { internal::tensor_to_sizes.clear(); diff --git a/backends/aoti/common_shims.h b/backends/aoti/common_shims.h index ffcbaa11a08..5f54cd1c878 100644 --- a/backends/aoti/common_shims.h +++ b/backends/aoti/common_shims.h @@ -58,6 +58,8 @@ AOTITorchError aoti_torch_get_dim(Tensor* tensor, int64_t* ret_dim); int32_t aoti_torch_device_type_cpu(); int32_t aoti_torch_layout_strided(); int32_t aoti_torch_dtype_float32(); +int32_t aoti_torch_dtype_bfloat16(); +int32_t aoti_torch_dtype_int64(); // Autograd mode functions int32_t aoti_torch_grad_mode_is_enabled(); diff --git a/backends/cuda/CMakeLists.txt b/backends/cuda/CMakeLists.txt index dc5b1b786f8..575f676e4cc 100644 --- a/backends/cuda/CMakeLists.txt +++ b/backends/cuda/CMakeLists.txt @@ -55,10 +55,7 @@ target_link_options(aoti_cuda PUBLIC -Wl,--export-dynamic) # Link against CUDA::cudart, common AOTI library, and PyTorch CUDA libraries target_link_libraries( - aoti_cuda - PUBLIC aoti_common CUDA::cudart ${CMAKE_DL_LIBS} - # Link PyTorch libraries for AOTI CUDA functions - ${TORCH_LIBRARIES} + aoti_cuda PUBLIC aoti_common CUDA::cudart ${CMAKE_DL_LIBS} ) # If you need other CUDA libraries, link them similarly: # target_link_libraries(aoti_cuda PUBLIC CUDA::cublas CUDA::cufft ...) From 81f1fdd186060003baf3df1e14ec6a0cbb8e2889 Mon Sep 17 00:00:00 2001 From: pytorchbot Date: Thu, 9 Oct 2025 01:16:33 -0400 Subject: [PATCH 2/2] nested namespace in cuda runtime (#14929) This PR was created by the merge bot to help merge the original PR into the main branch. ghstack PR number: https://github.com/pytorch/executorch/pull/14920 by @Gasoonjia ^ Please use this as the source of truth for the PR details, comments, and reviews ghstack PR base: https://github.com/pytorch/executorch/tree/gh/gasoonjia/52/base ghstack PR head: https://github.com/pytorch/executorch/tree/gh/gasoonjia/52/head Merge bot PR base: https://github.com/pytorch/executorch/tree/gh/gasoonjia/51/orig Merge bot PR head: https://github.com/pytorch/executorch/tree/gh/gasoonjia/52/orig Differential Revision: [D84201518](https://our.internmc.facebook.com/intern/diff/D84201518/) @diff-train-skip-merge Co-authored-by: gasoonjia --- backends/cuda/runtime/cuda_backend.cpp | 11 ++++------- backends/cuda/runtime/guard.cpp | 8 ++------ backends/cuda/runtime/guard.h | 8 ++------ backends/cuda/runtime/shims/cuda_guard.cpp | 8 ++------ backends/cuda/runtime/shims/cuda_guard.h | 8 ++------ backends/cuda/runtime/shims/memory.cpp | 8 ++------ backends/cuda/runtime/shims/memory.h | 8 ++------ backends/cuda/runtime/shims/tensor_attribute.cpp | 8 ++------ backends/cuda/runtime/shims/tensor_attribute.h | 8 ++------ backends/cuda/runtime/utils.h | 8 ++------ 10 files changed, 22 insertions(+), 61 deletions(-) diff --git a/backends/cuda/runtime/cuda_backend.cpp b/backends/cuda/runtime/cuda_backend.cpp index e10322ad40c..58ab54e1aac 100644 --- a/backends/cuda/runtime/cuda_backend.cpp +++ b/backends/cuda/runtime/cuda_backend.cpp @@ -26,9 +26,7 @@ #include #include -namespace executorch { -namespace backends { -namespace cuda { +namespace executorch::backends::cuda { #define LOAD_SYMBOL(name, handle) \ do { \ @@ -335,14 +333,13 @@ class ET_EXPERIMENTAL CudaBackend final } }; -} // namespace cuda +} // namespace executorch::backends::cuda +namespace executorch::backends { namespace { auto cls = cuda::CudaBackend(); executorch::runtime::Backend backend{"CudaBackend", &cls}; static executorch::runtime::Error success_with_compiler = register_backend(backend); } // namespace - -} // namespace backends -} // namespace executorch +} // namespace executorch::backends diff --git a/backends/cuda/runtime/guard.cpp b/backends/cuda/runtime/guard.cpp index 885efc7670d..674cc6387b3 100644 --- a/backends/cuda/runtime/guard.cpp +++ b/backends/cuda/runtime/guard.cpp @@ -9,9 +9,7 @@ #include #include -namespace executorch { -namespace backends { -namespace cuda { +namespace executorch::backends::cuda { namespace { // Thread-local stream storage (private to this file) @@ -146,6 +144,4 @@ Result CUDAStreamGuard::create( return stream_guard; } -} // namespace cuda -} // namespace backends -} // namespace executorch +} // namespace executorch::backends::cuda diff --git a/backends/cuda/runtime/guard.h b/backends/cuda/runtime/guard.h index 4e5a18a4c0f..3f187000f90 100644 --- a/backends/cuda/runtime/guard.h +++ b/backends/cuda/runtime/guard.h @@ -14,9 +14,7 @@ #include #include -namespace executorch { -namespace backends { -namespace cuda { +namespace executorch::backends::cuda { using executorch::runtime::Error; using executorch::runtime::Result; @@ -190,6 +188,4 @@ class CUDAStreamGuard { DeviceIndex device_index_; }; -} // namespace cuda -} // namespace backends -} // namespace executorch +} // namespace executorch::backends::cuda diff --git a/backends/cuda/runtime/shims/cuda_guard.cpp b/backends/cuda/runtime/shims/cuda_guard.cpp index 5740d0bf654..bb07acc7ffa 100644 --- a/backends/cuda/runtime/shims/cuda_guard.cpp +++ b/backends/cuda/runtime/shims/cuda_guard.cpp @@ -8,9 +8,7 @@ #include -namespace executorch { -namespace backends { -namespace cuda { +namespace executorch::backends::cuda { extern "C" { @@ -104,6 +102,4 @@ AOTITorchError aoti_torch_get_current_cuda_stream( } // extern "C" -} // namespace cuda -} // namespace backends -} // namespace executorch +} // namespace executorch::backends::cuda diff --git a/backends/cuda/runtime/shims/cuda_guard.h b/backends/cuda/runtime/shims/cuda_guard.h index 6da869064a7..f930f3df643 100644 --- a/backends/cuda/runtime/shims/cuda_guard.h +++ b/backends/cuda/runtime/shims/cuda_guard.h @@ -13,9 +13,7 @@ #include #include -namespace executorch { -namespace backends { -namespace cuda { +namespace executorch::backends::cuda { using executorch::backends::aoti::AOTITorchError; @@ -99,6 +97,4 @@ AOTITorchError aoti_torch_get_current_cuda_stream( } // extern "C" -} // namespace cuda -} // namespace backends -} // namespace executorch +} // namespace executorch::backends::cuda diff --git a/backends/cuda/runtime/shims/memory.cpp b/backends/cuda/runtime/shims/memory.cpp index a054169330b..b8e3dc8e21b 100644 --- a/backends/cuda/runtime/shims/memory.cpp +++ b/backends/cuda/runtime/shims/memory.cpp @@ -19,9 +19,7 @@ #include #include -namespace executorch { -namespace backends { -namespace cuda { +namespace executorch::backends::cuda { using executorch::aten::SizesType; using executorch::aten::StridesType; @@ -659,6 +657,4 @@ AOTITorchError aoti_torch__reinterpret_tensor( } // extern "C" -} // namespace cuda -} // namespace backends -} // namespace executorch +} // namespace executorch::backends::cuda diff --git a/backends/cuda/runtime/shims/memory.h b/backends/cuda/runtime/shims/memory.h index bcec6621285..7a8d4c3609b 100644 --- a/backends/cuda/runtime/shims/memory.h +++ b/backends/cuda/runtime/shims/memory.h @@ -12,9 +12,7 @@ #include #include -namespace executorch { -namespace backends { -namespace cuda { +namespace executorch::backends::cuda { using executorch::backends::aoti::AOTITorchError; using executorch::backends::aoti::Tensor; @@ -145,6 +143,4 @@ aoti_torch_copy_(Tensor* self, Tensor* src, int32_t non_blocking); void clear_all_tensors(); } // extern "C" -} // namespace cuda -} // namespace backends -} // namespace executorch +} // namespace executorch::backends::cuda diff --git a/backends/cuda/runtime/shims/tensor_attribute.cpp b/backends/cuda/runtime/shims/tensor_attribute.cpp index 5b640b7a9e8..1a14c79f9f2 100644 --- a/backends/cuda/runtime/shims/tensor_attribute.cpp +++ b/backends/cuda/runtime/shims/tensor_attribute.cpp @@ -8,9 +8,7 @@ #include -namespace executorch { -namespace backends { -namespace cuda { +namespace executorch::backends::cuda { extern "C" { @@ -31,6 +29,4 @@ int32_t aoti_torch_device_type_cuda() { } // extern "C" -} // namespace cuda -} // namespace backends -} // namespace executorch +} // namespace executorch::backends::cuda diff --git a/backends/cuda/runtime/shims/tensor_attribute.h b/backends/cuda/runtime/shims/tensor_attribute.h index e99958b4f0c..15a4e397d24 100644 --- a/backends/cuda/runtime/shims/tensor_attribute.h +++ b/backends/cuda/runtime/shims/tensor_attribute.h @@ -12,9 +12,7 @@ #include #include -namespace executorch { -namespace backends { -namespace cuda { +namespace executorch::backends::cuda { // Common using declarations for ExecutorTorch types using executorch::runtime::Error; @@ -35,6 +33,4 @@ int32_t aoti_torch_device_type_cuda(); } // extern "C" -} // namespace cuda -} // namespace backends -} // namespace executorch +} // namespace executorch::backends::cuda diff --git a/backends/cuda/runtime/utils.h b/backends/cuda/runtime/utils.h index 02c3abfc83f..2d805724090 100644 --- a/backends/cuda/runtime/utils.h +++ b/backends/cuda/runtime/utils.h @@ -34,9 +34,7 @@ #define ET_CUDA_KERNEL_LAUNCH_CHECK_OR_RETURN_ERROR() \ ET_CUDA_CHECK_OR_RETURN_ERROR(cudaGetLastError()) -namespace executorch { -namespace backends { -namespace cuda { +namespace executorch::backends::cuda { // Enum for supported data types in et-cuda backend enum class SupportedDTypes : int32_t { @@ -125,6 +123,4 @@ inline AOTITorchError validate_dtype(int32_t dtype) { } } // extern "C" -} // namespace cuda -} // namespace backends -} // namespace executorch +} // namespace executorch::backends::cuda