From 8b8fa6c92892fbfc3c9917ae3ccf584b66eadb55 Mon Sep 17 00:00:00 2001 From: Pearu Peterson Date: Sat, 6 Dec 2025 00:12:30 +0200 Subject: [PATCH 1/2] Eliminate sizes, strides, mutable_data_ptr, const_data_ptr ops --- src/libtorchaudio/rnnt/cpu/compute.cpp | 5 +-- src/libtorchaudio/stable/ops.h | 47 ++++---------------------- 2 files changed, 10 insertions(+), 42 deletions(-) diff --git a/src/libtorchaudio/rnnt/cpu/compute.cpp b/src/libtorchaudio/rnnt/cpu/compute.cpp index 0449b8afcc..d0f9e0e234 100644 --- a/src/libtorchaudio/rnnt/cpu/compute.cpp +++ b/src/libtorchaudio/rnnt/cpu/compute.cpp @@ -114,9 +114,10 @@ std::tuple compute( // when stable ABI Tensor supports mutable_data_ptr templates. Workspace workspace( /*options=*/options, - /*dtype_data=*/reinterpret_cast(float_workspace.data_ptr()), + /*dtype_data=*/ + reinterpret_cast(float_workspace.mutable_data_ptr()), /*dtype_size=*/float_workspace.numel(), - /*int_data=*/reinterpret_cast(int_workspace.data_ptr()), + /*int_data=*/reinterpret_cast(int_workspace.mutable_data_ptr()), /*int_size=*/int_workspace.numel()); THO_DISPATCH_V2( diff --git a/src/libtorchaudio/stable/ops.h b/src/libtorchaudio/stable/ops.h index 1acb24fe7d..81f3a073a7 100644 --- a/src/libtorchaudio/stable/ops.h +++ b/src/libtorchaudio/stable/ops.h @@ -23,46 +23,11 @@ namespace torchaudio::stable { using Layout = int32_t; -// TODO: When sizes and strides are implemented in torch::stable, -// eliminate sizes and strides function below. -inline std::vector sizes(const Tensor& t) { - int64_t* ptr; - TORCH_ERROR_CODE_CHECK(aoti_torch_get_sizes(t.get(), &ptr)); - std::vector r(ptr, ptr + t.dim()); - return r; -} - -inline std::vector strides(const Tensor& t) { - int64_t* ptr; - TORCH_ERROR_CODE_CHECK(aoti_torch_get_strides(t.get(), &ptr)); - std::vector r(ptr, ptr + t.dim()); - return r; -} - -// TODO: When https://github.com/pytorch/pytorch/pull/161891 lands, -// eliminate mutable_data_ptr and const_data_ptr templates. -#define aoti_torch_get_mutable_data_ptr aoti_torch_get_data_ptr -#define aoti_torch_get_const_data_ptr aoti_torch_get_data_ptr -template -T* mutable_data_ptr(const Tensor& t) { - void* data_ptr{}; - TORCH_ERROR_CODE_CHECK(aoti_torch_get_mutable_data_ptr(t.get(), &data_ptr)); - return reinterpret_cast(data_ptr); -} - -template -const T* const_data_ptr(const Tensor& t) { - const void* data_ptr{}; - TORCH_ERROR_CODE_CHECK( - aoti_torch_get_const_data_ptr(t.get(), const_cast(&data_ptr))); - return reinterpret_cast(data_ptr); -} - // TODO: When cpu is implemented in torch::stable, eliminate // cpu function below. inline Tensor cpu(const Tensor& self) { - auto sizes_ = sizes(self); - auto cpu_type = aoti_torch_device_type_cpu(); + auto sizes_ = self.sizes(); + int32_t cpu_type = static_cast(torch::stable::DeviceType::CPU); int32_t dtype; TORCH_ERROR_CODE_CHECK(aoti_torch_get_dtype(self.get(), &dtype)); int32_t layout; @@ -85,8 +50,8 @@ inline Tensor cpu(const Tensor& self) { // TODO: inline Tensor cuda(const Tensor& self, int32_t cuda_index) { - auto sizes_ = sizes(self); - auto cuda_type = aoti_torch_device_type_cuda(); + auto sizes_ = self.sizes(); + int32_t cuda_type = static_cast(torch::stable::DeviceType::CUDA); int32_t dtype; TORCH_ERROR_CODE_CHECK(aoti_torch_get_dtype(self.get(), &dtype)); int32_t layout; @@ -169,7 +134,9 @@ T item(const Tensor& self) { STD_TORCH_CHECK( self.numel() == 1, "item requires single element tensor input"); if (self.is_cpu()) { - return torchaudio::stable::const_data_ptr(self)[0]; + // TODO: use `return self.const_data_ptr()[0];` after torch + // stable supports const_data_ptr templates. + return reinterpret_cast(self.const_data_ptr())[0]; #ifdef USE_CUDA } else if (self.is_cuda()) { T value; From b68da9acc7825337bc994d897b059224c6b1ca31 Mon Sep 17 00:00:00 2001 From: Pearu Peterson Date: Sat, 6 Dec 2025 00:36:38 +0200 Subject: [PATCH 2/2] Eliminate new_zeros --- .../forced_align/cpu/compute.cpp | 3 +- src/libtorchaudio/forced_align/gpu/compute.cu | 3 +- src/libtorchaudio/stable/ops.h | 66 ++----------------- src/libtorchaudio/utils.h | 9 +-- 4 files changed, 14 insertions(+), 67 deletions(-) diff --git a/src/libtorchaudio/forced_align/cpu/compute.cpp b/src/libtorchaudio/forced_align/cpu/compute.cpp index 5adb822f66..7decc1a635 100644 --- a/src/libtorchaudio/forced_align/cpu/compute.cpp +++ b/src/libtorchaudio/forced_align/cpu/compute.cpp @@ -208,7 +208,8 @@ std::tuple compute( ScalarType::Long); const auto B = logProbs.size(0); const auto T = logProbs.size(1); - Tensor paths = torchaudio::stable::new_zeros(targets, {B, T}); + Tensor paths = torch::stable::empty({B, T}, targets.scalar_type()); + torch::stable::zero_(paths); THO_DISPATCH_V2( logProbs.scalar_type(), "forced_align_impl", diff --git a/src/libtorchaudio/forced_align/gpu/compute.cu b/src/libtorchaudio/forced_align/gpu/compute.cu index ce3e07ac8b..16d42def74 100644 --- a/src/libtorchaudio/forced_align/gpu/compute.cu +++ b/src/libtorchaudio/forced_align/gpu/compute.cu @@ -294,7 +294,8 @@ std::tuple compute( auto B = logProbs.size(0); auto T = logProbs.size(1); // num frames - Tensor paths = torchaudio::stable::new_zeros(targets, {B, T}, /*dtype=*/std::nullopt, /*layout=*/std::nullopt, /*device=*/torch::stable::DeviceType::CPU); + Tensor paths = torch::stable::empty({B, T}, targets.scalar_type()); + torch::stable::zero_(paths); THO_DISPATCH_V2(logProbs.scalar_type(), "forced_align_impl", AT_WRAP([&] { if (targets.scalar_type() == ScalarType::Long) { diff --git a/src/libtorchaudio/stable/ops.h b/src/libtorchaudio/stable/ops.h index 81f3a073a7..48fe8adae8 100644 --- a/src/libtorchaudio/stable/ops.h +++ b/src/libtorchaudio/stable/ops.h @@ -17,14 +17,12 @@ #include #endif -using torch::stable::Tensor; - namespace torchaudio::stable { -using Layout = int32_t; +using torch::stable::Tensor; -// TODO: When cpu is implemented in torch::stable, eliminate -// cpu function below. +// TODO: When cpu op is implemented in torch::stable, eliminate cpu +// function below. inline Tensor cpu(const Tensor& self) { auto sizes_ = self.sizes(); int32_t cpu_type = static_cast(torch::stable::DeviceType::CPU); @@ -48,7 +46,8 @@ inline Tensor cpu(const Tensor& self) { return result; } -// TODO: +// TODO: When cuda op is implemented in torch::stable, eliminate cuda +// function below. inline Tensor cuda(const Tensor& self, int32_t cuda_index) { auto sizes_ = self.sizes(); int32_t cuda_type = static_cast(torch::stable::DeviceType::CUDA); @@ -72,61 +71,6 @@ inline Tensor cuda(const Tensor& self, int32_t cuda_index) { return result; } -// TODO: remove when torch::stable provides new_zeros -inline Tensor new_zeros( - const Tensor& self, - std::vector size, - std::optional dtype = std::nullopt, - std::optional layout = std::nullopt, - std::optional device = std::nullopt, - std::optional pin_memory = std::nullopt) { - int32_t target_dtype{}; - if (dtype.has_value()) { - target_dtype = torch::stable::detail::to( - torch::stable::detail::from(dtype.value())); - } else { - TORCH_ERROR_CODE_CHECK(aoti_torch_get_dtype(self.get(), &target_dtype)); - } - - Layout layout_; - if (layout.has_value()) { - layout_ = layout.value(); - } else { - TORCH_ERROR_CODE_CHECK(aoti_torch_get_layout(self.get(), &layout_)); - } - - int32_t device_type; - torch::stable::DeviceIndex device_index = 0; - if (device.has_value()) { - auto device_ = device.value(); - device_type = static_cast(device_.type()); - device_index = device_.index(); - } else { - TORCH_ERROR_CODE_CHECK( - aoti_torch_get_device_type(self.get(), &device_type)); - TORCH_ERROR_CODE_CHECK( - aoti_torch_get_device_index(self.get(), &device_index)); - } - - // TODO: pin_memory - - AtenTensorHandle ret0; - TORCH_ERROR_CODE_CHECK(aoti_torch_aten_new_empty( - self.get(), - size.data(), - static_cast(size.size()), - &target_dtype, - &layout_, - &device_type, - device_index, - nullptr, // pin_memory (nullptr for default) - &ret0)); - - auto result = Tensor(ret0); - torch::stable::zero_(result); - return result; -} - // An analog of item template function defined in // ATen/templates/TensorBody.h template diff --git a/src/libtorchaudio/utils.h b/src/libtorchaudio/utils.h index 725b6c9699..f72c13b1b0 100644 --- a/src/libtorchaudio/utils.h +++ b/src/libtorchaudio/utils.h @@ -4,7 +4,7 @@ // TODO: replace the include libtorchaudio/stable/ops.h with // torch/stable/ops.h when torch::stable provides all required -// features (torch::stable::item or similar): +// features (torch::stable::item et al): #include namespace torchaudio { @@ -25,7 +25,7 @@ using TensorAccessor = torch::headeronly::HeaderOnlyTensorAccessor; // TODO: eliminate accessor(t) in favor of t.accessor // after Tensor::accessor is supported in stable ABI template -inline TensorAccessor accessor(Tensor t) { +inline TensorAccessor accessor(torch::stable::Tensor t) { return TensorAccessor( reinterpret_cast(t.data_ptr()), t.sizes().data(), t.strides().data()); } @@ -42,7 +42,7 @@ using PackedTensorAccessor32 = // TODO: eliminate accessor(t) in favor of t.accessor // after Tensor::accessor is supported in stable ABI template -inline PackedTensorAccessor32 packed_accessor32(Tensor t) { +inline PackedTensorAccessor32 packed_accessor32(torch::stable::Tensor t) { return PackedTensorAccessor32( static_cast::PtrType>(t.data_ptr()), t.sizes().data(), @@ -58,7 +58,8 @@ using PackedTensorAccessorSizeT = size_t>; template -inline PackedTensorAccessorSizeT packed_accessor_size_t(Tensor t) { +inline PackedTensorAccessorSizeT packed_accessor_size_t( + torch::stable::Tensor t) { return PackedTensorAccessorSizeT( static_cast::PtrType>( t.data_ptr()),