Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions backends/aoti/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ inline executorch::aten::ScalarType dtype_to_scalar_type(int32_t dtype) {
switch (dtype) {
case 6: // PyTorch's float32 dtype code
return executorch::aten::ScalarType::Float;
case 15: // PyTorch's bfloat16 dtype code
return executorch::aten::ScalarType::BFloat16;
// Future support for additional dtypes can be added here
default:
ET_LOG(Error, "Unsupported dtype: %d for ScalarType conversion", dtype);
Expand Down Expand Up @@ -71,6 +73,23 @@ inline AOTITorchError validate_storage_offset(int64_t storage_offset) {
return Error::Ok;
}

// Check if tensor is in contiguous memory format (NCHW for 4D tensors)
// Contiguous format means strides decrease from left to right:
// For NCHW: strides = [C*H*W, H*W, W, 1]
inline bool is_tensor_contiguous(
int64_t ndim,
const int64_t* sizes,
const int64_t* strides) {
int64_t expected_stride = 1;
for (int64_t i = ndim - 1; i >= 0; i--) {
if (strides[i] != expected_stride) {
return false;
}
expected_stride *= sizes[i];
}
return true;
}

} // extern "C"

} // namespace aoti
Expand Down
32 changes: 32 additions & 0 deletions backends/cuda/runtime/TARGETS
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")

oncall("executorch")

runtime.cxx_library(
name = "runtime_shims",
srcs = [
"shims/memory.cpp",
"shims/tensor_attribute.cpp",
],
headers = [
"shims/memory.h",
"shims/tensor_attribute.h",
"shims/utils.h",
],
# @lint-ignore BUCKLINT: Avoid `link_whole=True` (https://fburl.com/avoid-link-whole)
link_whole = True,
supports_python_dlopen = True,
# Constructor needed for backend registration.
compiler_flags = ["-Wno-global-constructors"],
visibility = ["@EXECUTORCH_CLIENTS"],
deps = [
"//executorch/backends/aoti:common_shims",
"//executorch/extension/tensor:tensor",
"//executorch/runtime/core:core",
"//executorch/runtime/core/exec_aten:lib",
"//executorch/runtime/platform:platform",
],
external_deps = [
("cuda", None, "cuda-lazy"),
],
)
Loading
Loading