From 72195f5ea1b056cbb7c40a804f5ff78f3349212b Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Mon, 6 Oct 2025 12:00:49 -0500 Subject: [PATCH] [Offload] Remove check on kernel argument sizes Summary: This check is unnecessarily restrictive and currently incorrectly fires for any size less than eight bytes. Just remove it, we do sanity checks elsewhere and at some point need to trust the ABI. --- offload/plugins-nextgen/amdgpu/src/rtl.cpp | 5 ----- offload/unittests/OffloadAPI/device_code/CMakeLists.txt | 2 ++ offload/unittests/OffloadAPI/device_code/byte.cpp | 3 +++ offload/unittests/OffloadAPI/kernel/olLaunchKernel.cpp | 1 + 4 files changed, 6 insertions(+), 5 deletions(-) create mode 100644 offload/unittests/OffloadAPI/device_code/byte.cpp diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp index f73fa0475a3a7..8d2f9755e0351 100644 --- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp +++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp @@ -3687,11 +3687,6 @@ Error AMDGPUKernelTy::launchImpl(GenericDeviceTy &GenericDevice, KernelArgsTy &KernelArgs, KernelLaunchParamsTy LaunchParams, AsyncInfoWrapperTy &AsyncInfoWrapper) const { - if (ArgsSize != LaunchParams.Size && - ArgsSize > LaunchParams.Size + getImplicitArgsSize()) - return Plugin::error(ErrorCode::INVALID_ARGUMENT, - "invalid kernel arguments size"); - AMDGPUPluginTy &AMDGPUPlugin = static_cast(GenericDevice.Plugin); AMDHostDeviceTy &HostDevice = AMDGPUPlugin.getHostDevice(); diff --git a/offload/unittests/OffloadAPI/device_code/CMakeLists.txt b/offload/unittests/OffloadAPI/device_code/CMakeLists.txt index 50e430597e646..1a042e1b38315 100644 --- a/offload/unittests/OffloadAPI/device_code/CMakeLists.txt +++ b/offload/unittests/OffloadAPI/device_code/CMakeLists.txt @@ -2,6 +2,7 @@ add_offload_test_device_code(foo.cpp foo) add_offload_test_device_code(bar.cpp bar) # Compile with optimizations to eliminate AMDGPU implicit arguments. add_offload_test_device_code(noargs.cpp noargs -O3) +add_offload_test_device_code(byte.cpp byte) add_offload_test_device_code(localmem.cpp localmem) add_offload_test_device_code(localmem_reduction.cpp localmem_reduction) add_offload_test_device_code(localmem_static.cpp localmem_static) @@ -14,6 +15,7 @@ add_custom_target(offload_device_binaries DEPENDS foo.bin bar.bin noargs.bin + byte.bin localmem.bin localmem_reduction.bin localmem_static.bin diff --git a/offload/unittests/OffloadAPI/device_code/byte.cpp b/offload/unittests/OffloadAPI/device_code/byte.cpp new file mode 100644 index 0000000000000..779d120fefcaf --- /dev/null +++ b/offload/unittests/OffloadAPI/device_code/byte.cpp @@ -0,0 +1,3 @@ +#include + +extern "C" __gpu_kernel void byte(unsigned char c) { (void)c; } diff --git a/offload/unittests/OffloadAPI/kernel/olLaunchKernel.cpp b/offload/unittests/OffloadAPI/kernel/olLaunchKernel.cpp index 1dac8c50271b5..c9eca36a4d447 100644 --- a/offload/unittests/OffloadAPI/kernel/olLaunchKernel.cpp +++ b/offload/unittests/OffloadAPI/kernel/olLaunchKernel.cpp @@ -55,6 +55,7 @@ struct LaunchSingleKernelTestBase : LaunchKernelTestBase { KERNEL_TEST(Foo, foo) KERNEL_TEST(NoArgs, noargs) +KERNEL_TEST(Byte, byte) KERNEL_TEST(LocalMem, localmem) KERNEL_TEST(LocalMemReduction, localmem_reduction) KERNEL_TEST(LocalMemStatic, localmem_static)