[Offload] Add device info for shared memory #167817

kevinsala · 2025-11-13T04:18:44Z

This will be needed by #152831

llvmbot · 2025-11-13T04:19:17Z

@llvm/pr-subscribers-backend-amdgpu

@llvm/pr-subscribers-offload

Author: Kevin Sala Penades (kevinsala)

Changes

This will be needed by #152831

Full diff: https://github.com/llvm/llvm-project/pull/167817.diff

8 Files Affected:

(modified) offload/liboffload/API/Device.td (+1)
(modified) offload/liboffload/src/OffloadImpl.cpp (+9)
(modified) offload/plugins-nextgen/amdgpu/src/rtl.cpp (+14)
(modified) offload/plugins-nextgen/common/include/PluginInterface.h (+6)
(modified) offload/plugins-nextgen/cuda/src/rtl.cpp (+8-4)
(modified) offload/tools/deviceinfo/llvm-offload-device-info.cpp (+3)
(modified) offload/unittests/OffloadAPI/device/olGetDeviceInfo.cpp (+5)
(modified) offload/unittests/OffloadAPI/device/olGetDeviceInfoSize.cpp (+2)

diff --git a/offload/liboffload/API/Device.td b/offload/liboffload/API/Device.td
index e9c154818c4a1..a918cff6de26e 100644
--- a/offload/liboffload/API/Device.td
+++ b/offload/liboffload/API/Device.td
@@ -43,6 +43,7 @@ def ol_device_info_t : Enum {
     TaggedEtor<"ADDRESS_BITS", "uint32_t", "Number of bits used to represent an address in device memory">,
     TaggedEtor<"MAX_MEM_ALLOC_SIZE", "uint64_t", "The maximum size of memory object allocation in bytes">,
     TaggedEtor<"GLOBAL_MEM_SIZE", "uint64_t", "The size of global device memory in bytes">,
+    TaggedEtor<"WORK_GROUP_SHARED_MEM_SIZE", "uint64_t", "The maximum size of shared memory per work group in bytes">,
   ];
   list<TaggedEtor> fp_configs = !foreach(type, ["Single", "Double", "Half"], TaggedEtor<type # "_FP_CONFIG", "ol_device_fp_capability_flags_t", type # " precision floating point capability">);
   list<TaggedEtor> native_vec_widths = !foreach(type, ["char","short","int","long","float","double","half"], TaggedEtor<"NATIVE_VECTOR_WIDTH_" # type, "uint32_t", "Native vector width for " # type>);
diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index 84bc414396811..844ba18e3080c 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -495,6 +495,14 @@ Error olGetDeviceInfoImplDetail(ol_device_handle_t Device,
     return Info.write(static_cast<uint32_t>(Value));
   }
 
+  case OL_DEVICE_INFO_WORK_GROUP_SHARED_MEM_SIZE: {
+    // Uint64 values
+    if (!std::holds_alternative<uint64_t>(Entry->Value))
+      return makeError(ErrorCode::BACKEND_FAILURE,
+                       "plugin returned incorrect type");
+    return Info.write(std::get<uint64_t>(Entry->Value));
+  }
+
   case OL_DEVICE_INFO_MAX_WORK_SIZE_PER_DIMENSION:
   case OL_DEVICE_INFO_MAX_WORK_GROUP_SIZE_PER_DIMENSION: {
     // {x, y, z} triples
@@ -590,6 +598,7 @@ Error olGetDeviceInfoImplDetailHost(ol_device_handle_t Device,
     return Info.write<uint32_t>(std::numeric_limits<uintptr_t>::digits);
   case OL_DEVICE_INFO_MAX_MEM_ALLOC_SIZE:
   case OL_DEVICE_INFO_GLOBAL_MEM_SIZE:
+  case OL_DEVICE_INFO_WORK_GROUP_SHARED_MEM_SIZE:
     return Info.write<uint64_t>(0);
   default:
     return createOffloadError(ErrorCode::INVALID_ENUMERATION,
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index 04b394452a448..17d2586dd2d14 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -2186,6 +2186,17 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
     if (auto Err = checkIfAPU())
       return Err;
 
+    // Retrieve the size of the group memory.
+    for (const auto *Pool : AllMemoryPools) {
+      if (Pool->isGroup()) {
+        size_t Size = 0;
+        if (auto Err = Pool->getAttr(HSA_AMD_MEMORY_POOL_INFO_SIZE, Size))
+          return Err;
+        MaxBlockSharedMemSize = Size;
+        break;
+      }
+    }
+
     return Plugin::success();
   }
 
@@ -2923,6 +2934,9 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
     if (Status == HSA_STATUS_SUCCESS)
       Info.add("Cacheline Size", TmpUInt);
 
+    Info.add("Max Shared Memory per Work Group", MaxBlockSharedMemSize, "bytes",
+             DeviceInfo::WORK_GROUP_SHARED_MEM_SIZE);
+
     Status = getDeviceAttrRaw(HSA_AMD_AGENT_INFO_MAX_CLOCK_FREQUENCY, TmpUInt);
     if (Status == HSA_STATUS_SUCCESS)
       Info.add("Max Clock Freq", TmpUInt, "MHz",
diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h
index 2135e0608323e..b900f1b728736 100644
--- a/offload/plugins-nextgen/common/include/PluginInterface.h
+++ b/offload/plugins-nextgen/common/include/PluginInterface.h
@@ -794,6 +794,9 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
   /// Get the unique identifier of the device.
   const char *getDeviceUid() const { return DeviceUid.c_str(); }
 
+  /// Get the total shared memory per block that can be used in any kernel.
+  uint32_t getMaxBlockSharedMemSize() const { return MaxBlockSharedMemSize; }
+
   /// Set the context of the device if needed, before calling device-specific
   /// functions. Plugins may implement this function as a no-op if not needed.
   virtual Error setContext() = 0;
@@ -1251,6 +1254,9 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
   /// Internal representation for OMPT device (initialize & finalize)
   std::atomic<bool> OmptInitialized;
 #endif
+
+  /// The total per-block native shared memory that a kernel may use.
+  uint32_t MaxBlockSharedMemSize = 0;
 };
 
 /// Class implementing common functionalities of offload plugins. Each plugin
diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp
index 45e580e7e0cd7..c8e26790f9f41 100644
--- a/offload/plugins-nextgen/cuda/src/rtl.cpp
+++ b/offload/plugins-nextgen/cuda/src/rtl.cpp
@@ -379,6 +379,12 @@ struct CUDADeviceTy : public GenericDeviceTy {
       return Err;
     HardwareParallelism = NumMuliprocessors * (MaxThreadsPerSM / WarpSize);
 
+    uint32_t MaxSharedMem;
+    if (auto Err = getDeviceAttr(
+            CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK, MaxSharedMem))
+      return Err;
+    MaxBlockSharedMemSize = MaxSharedMem;
+
     return Plugin::success();
   }
 
@@ -1089,10 +1095,8 @@ struct CUDADeviceTy : public GenericDeviceTy {
     if (Res == CUDA_SUCCESS)
       Info.add("Total Constant Memory", TmpInt, "bytes");
 
-    Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK,
-                           TmpInt);
-    if (Res == CUDA_SUCCESS)
-      Info.add("Max Shared Memory per Block", TmpInt, "bytes");
+    Info.add("Max Shared Memory per Block", MaxBlockSharedMemSize, "bytes",
+             DeviceInfo::WORK_GROUP_SHARED_MEM_SIZE);
 
     Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK, TmpInt);
     if (Res == CUDA_SUCCESS)
diff --git a/offload/tools/deviceinfo/llvm-offload-device-info.cpp b/offload/tools/deviceinfo/llvm-offload-device-info.cpp
index 42ffb97d6d77c..75247760a4af3 100644
--- a/offload/tools/deviceinfo/llvm-offload-device-info.cpp
+++ b/offload/tools/deviceinfo/llvm-offload-device-info.cpp
@@ -205,6 +205,9 @@ ol_result_t printDevice(std::ostream &S, ol_device_handle_t D) {
       S, D, OL_DEVICE_INFO_MAX_MEM_ALLOC_SIZE, "Max Mem Allocation Size", "B"));
   OFFLOAD_ERR(printDeviceValue<uint64_t>(S, D, OL_DEVICE_INFO_GLOBAL_MEM_SIZE,
                                          "Global Mem Size", "B"));
+  OFFLOAD_ERR(printDeviceValue<uint64_t>(
+      S, D, OL_DEVICE_INFO_WORK_GROUP_SHARED_MEM_SIZE,
+      "Work Group Shared Mem Size", "B"));
   OFFLOAD_ERR(
       (printDeviceValue<ol_device_fp_capability_flags_t, PrintKind::FP_FLAGS>(
           S, D, OL_DEVICE_INFO_SINGLE_FP_CONFIG,
diff --git a/offload/unittests/OffloadAPI/device/olGetDeviceInfo.cpp b/offload/unittests/OffloadAPI/device/olGetDeviceInfo.cpp
index 30eafee026316..b0d8ea7faea5e 100644
--- a/offload/unittests/OffloadAPI/device/olGetDeviceInfo.cpp
+++ b/offload/unittests/OffloadAPI/device/olGetDeviceInfo.cpp
@@ -217,6 +217,11 @@ OL_DEVICE_INFO_TEST_DEVICE_VALUE_GT(GlobalMemSize, uint64_t,
                                     OL_DEVICE_INFO_GLOBAL_MEM_SIZE, 0);
 OL_DEVICE_INFO_TEST_HOST_SUCCESS(GlobalMemSize, uint64_t,
                                  OL_DEVICE_INFO_GLOBAL_MEM_SIZE);
+OL_DEVICE_INFO_TEST_DEVICE_VALUE_GT(SharedMemSize, uint64_t,
+                                    OL_DEVICE_INFO_WORK_GROUP_SHARED_MEM_SIZE,
+                                    0);
+OL_DEVICE_INFO_TEST_HOST_SUCCESS(SharedMemSize, uint64_t,
+                                 OL_DEVICE_INFO_WORK_GROUP_SHARED_MEM_SIZE);
 
 TEST_P(olGetDeviceInfoTest, InvalidNullHandleDevice) {
   ol_device_type_t DeviceType;
diff --git a/offload/unittests/OffloadAPI/device/olGetDeviceInfoSize.cpp b/offload/unittests/OffloadAPI/device/olGetDeviceInfoSize.cpp
index 79a18c1d133dc..11d20004e91fb 100644
--- a/offload/unittests/OffloadAPI/device/olGetDeviceInfoSize.cpp
+++ b/offload/unittests/OffloadAPI/device/olGetDeviceInfoSize.cpp
@@ -71,6 +71,8 @@ OL_DEVICE_INFO_SIZE_TEST_EQ(MaxMemAllocSize, uint64_t,
                             OL_DEVICE_INFO_MAX_MEM_ALLOC_SIZE);
 OL_DEVICE_INFO_SIZE_TEST_EQ(GlobalMemSize, uint64_t,
                             OL_DEVICE_INFO_GLOBAL_MEM_SIZE);
+OL_DEVICE_INFO_SIZE_TEST_EQ(SharedMemSize, uint64_t,
+                            OL_DEVICE_INFO_WORK_GROUP_SHARED_MEM_SIZE);
 
 TEST_P(olGetDeviceInfoSizeTest, SuccessMaxWorkGroupSizePerDimension) {
   size_t Size = 0;

offload/liboffload/API/Device.td

offload/liboffload/src/OffloadImpl.cpp

offload/plugins-nextgen/common/include/PluginInterface.h

ro-i

LGTM in general, but probably best to see if Joseph has any further comments :)

offload/plugins-nextgen/common/include/PluginInterface.h

kevinsala · 2025-11-13T18:39:51Z

All comments have been fixed now.

This reverts commit 1a86f0a.

[Offload] Add device info for shared memory

7154919

kevinsala requested review from RossBrunton, jhuber6 and ro-i November 13, 2025 04:18

llvmbot added backend:AMDGPU offload labels Nov 13, 2025

jhuber6 reviewed Nov 13, 2025

View reviewed changes

offload/liboffload/API/Device.td Outdated Show resolved Hide resolved

offload/liboffload/src/OffloadImpl.cpp Outdated Show resolved Hide resolved

offload/plugins-nextgen/common/include/PluginInterface.h Outdated Show resolved Hide resolved

Fix review comment

a41ad51

ro-i approved these changes Nov 13, 2025

View reviewed changes

offload/plugins-nextgen/common/include/PluginInterface.h Outdated Show resolved Hide resolved

offload/plugins-nextgen/common/include/PluginInterface.h Outdated Show resolved Hide resolved

Fix review comments

6745dd5

jhuber6 approved these changes Nov 13, 2025

View reviewed changes

kevinsala merged commit 1a86f0a into llvm:main Nov 13, 2025
10 checks passed

ronlieb added a commit to ROCm/llvm-project that referenced this pull request Nov 13, 2025

Revert "[Offload] Add device info for shared memory (llvm#167817)"

67a60f8

This reverts commit 1a86f0a.

ronlieb added a commit to ROCm/llvm-project that referenced this pull request Nov 14, 2025

Revert "[Offload] Add device info for shared memory (llvm#167817)"

143e2c6

This reverts commit 1a86f0a.

ronlieb added a commit to ROCm/llvm-project that referenced this pull request Nov 14, 2025

Revert "[Offload] Add device info for shared memory (llvm#167817)"

6e3feaf

This reverts commit 1a86f0a.

ronlieb pushed a commit to ROCm/llvm-project that referenced this pull request Nov 14, 2025

[Offload] Add device info for shared memory (llvm#167817)

bc8423d

ronlieb added a commit to ROCm/llvm-project that referenced this pull request Nov 14, 2025

Revert "[Offload] Add device info for shared memory (llvm#167817)"

89671db

This reverts commit 1a86f0a.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[Offload] Add device info for shared memory #167817

[Offload] Add device info for shared memory #167817

Uh oh!

kevinsala commented Nov 13, 2025

Uh oh!

llvmbot commented Nov 13, 2025 •

edited

Loading

Uh oh!

Uh oh!

Uh oh!

Uh oh!

ro-i left a comment

Uh oh!

Uh oh!

Uh oh!

kevinsala commented Nov 13, 2025

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

4 participants

[Offload] Add device info for shared memory #167817

[Offload] Add device info for shared memory #167817

Uh oh!

Conversation

kevinsala commented Nov 13, 2025

Uh oh!

llvmbot commented Nov 13, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

ro-i left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

kevinsala commented Nov 13, 2025

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

4 participants

llvmbot commented Nov 13, 2025 •

edited

Loading