Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions offload/liboffload/API/Device.td
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ def ol_device_info_t : Enum {
TaggedEtor<"ADDRESS_BITS", "uint32_t", "Number of bits used to represent an address in device memory">,
TaggedEtor<"MAX_MEM_ALLOC_SIZE", "uint64_t", "The maximum size of memory object allocation in bytes">,
TaggedEtor<"GLOBAL_MEM_SIZE", "uint64_t", "The size of global device memory in bytes">,
TaggedEtor<"WORK_GROUP_LOCAL_MEM_SIZE", "uint64_t", "The maximum size of local shared memory per work group in bytes">,
];
list<TaggedEtor> fp_configs = !foreach(type, ["Single", "Double", "Half"], TaggedEtor<type # "_FP_CONFIG", "ol_device_fp_capability_flags_t", type # " precision floating point capability">);
list<TaggedEtor> native_vec_widths = !foreach(type, ["char","short","int","long","float","double","half"], TaggedEtor<"NATIVE_VECTOR_WIDTH_" # type, "uint32_t", "Native vector width for " # type>);
Expand Down
8 changes: 8 additions & 0 deletions offload/liboffload/src/OffloadImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -495,6 +495,13 @@ Error olGetDeviceInfoImplDetail(ol_device_handle_t Device,
return Info.write(static_cast<uint32_t>(Value));
}

case OL_DEVICE_INFO_WORK_GROUP_LOCAL_MEM_SIZE: {
if (!std::holds_alternative<uint64_t>(Entry->Value))
return makeError(ErrorCode::BACKEND_FAILURE,
"plugin returned incorrect type");
return Info.write(std::get<uint64_t>(Entry->Value));
}

case OL_DEVICE_INFO_MAX_WORK_SIZE_PER_DIMENSION:
case OL_DEVICE_INFO_MAX_WORK_GROUP_SIZE_PER_DIMENSION: {
// {x, y, z} triples
Expand Down Expand Up @@ -590,6 +597,7 @@ Error olGetDeviceInfoImplDetailHost(ol_device_handle_t Device,
return Info.write<uint32_t>(std::numeric_limits<uintptr_t>::digits);
case OL_DEVICE_INFO_MAX_MEM_ALLOC_SIZE:
case OL_DEVICE_INFO_GLOBAL_MEM_SIZE:
case OL_DEVICE_INFO_WORK_GROUP_LOCAL_MEM_SIZE:
return Info.write<uint64_t>(0);
default:
return createOffloadError(ErrorCode::INVALID_ENUMERATION,
Expand Down
13 changes: 13 additions & 0 deletions offload/plugins-nextgen/amdgpu/src/rtl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2186,6 +2186,16 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
if (auto Err = checkIfAPU())
return Err;

// Retrieve the size of the group memory.
for (const auto *Pool : AllMemoryPools) {
if (Pool->isGroup()) {
if (auto Err = Pool->getAttr(HSA_AMD_MEMORY_POOL_INFO_SIZE,
MaxBlockSharedMemSize))
return Err;
break;
}
}

return Plugin::success();
}

Expand Down Expand Up @@ -2923,6 +2933,9 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
if (Status == HSA_STATUS_SUCCESS)
Info.add("Cacheline Size", TmpUInt);

Info.add("Max Shared Memory per Work Group", MaxBlockSharedMemSize, "bytes",
DeviceInfo::WORK_GROUP_LOCAL_MEM_SIZE);

Status = getDeviceAttrRaw(HSA_AMD_AGENT_INFO_MAX_CLOCK_FREQUENCY, TmpUInt);
if (Status == HSA_STATUS_SUCCESS)
Info.add("Max Clock Freq", TmpUInt, "MHz",
Expand Down
7 changes: 7 additions & 0 deletions offload/plugins-nextgen/common/include/PluginInterface.h
Original file line number Diff line number Diff line change
Expand Up @@ -794,6 +794,10 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
/// Get the unique identifier of the device.
const char *getDeviceUid() const { return DeviceUid.c_str(); }

/// Get the total shared memory per block (in bytes) that can be used in any
/// kernel.
size_t getMaxBlockSharedMemSize() const { return MaxBlockSharedMemSize; }

/// Set the context of the device if needed, before calling device-specific
/// functions. Plugins may implement this function as a no-op if not needed.
virtual Error setContext() = 0;
Expand Down Expand Up @@ -1251,6 +1255,9 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
/// Internal representation for OMPT device (initialize & finalize)
std::atomic<bool> OmptInitialized;
#endif

/// The total per-block native shared memory that a kernel may use.
size_t MaxBlockSharedMemSize = 0;
};

/// Class implementing common functionalities of offload plugins. Each plugin
Expand Down
12 changes: 8 additions & 4 deletions offload/plugins-nextgen/cuda/src/rtl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,12 @@ struct CUDADeviceTy : public GenericDeviceTy {
return Err;
HardwareParallelism = NumMuliprocessors * (MaxThreadsPerSM / WarpSize);

uint32_t MaxSharedMem;
if (auto Err = getDeviceAttr(
CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK, MaxSharedMem))
return Err;
MaxBlockSharedMemSize = MaxSharedMem;

return Plugin::success();
}

Expand Down Expand Up @@ -1089,10 +1095,8 @@ struct CUDADeviceTy : public GenericDeviceTy {
if (Res == CUDA_SUCCESS)
Info.add("Total Constant Memory", TmpInt, "bytes");

Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK,
TmpInt);
if (Res == CUDA_SUCCESS)
Info.add("Max Shared Memory per Block", TmpInt, "bytes");
Info.add("Max Shared Memory per Block", MaxBlockSharedMemSize, "bytes",
DeviceInfo::WORK_GROUP_LOCAL_MEM_SIZE);

Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK, TmpInt);
if (Res == CUDA_SUCCESS)
Expand Down
3 changes: 3 additions & 0 deletions offload/tools/deviceinfo/llvm-offload-device-info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,9 @@ ol_result_t printDevice(std::ostream &S, ol_device_handle_t D) {
S, D, OL_DEVICE_INFO_MAX_MEM_ALLOC_SIZE, "Max Mem Allocation Size", "B"));
OFFLOAD_ERR(printDeviceValue<uint64_t>(S, D, OL_DEVICE_INFO_GLOBAL_MEM_SIZE,
"Global Mem Size", "B"));
OFFLOAD_ERR(
printDeviceValue<uint64_t>(S, D, OL_DEVICE_INFO_WORK_GROUP_LOCAL_MEM_SIZE,
"Work Group Shared Mem Size", "B"));
OFFLOAD_ERR(
(printDeviceValue<ol_device_fp_capability_flags_t, PrintKind::FP_FLAGS>(
S, D, OL_DEVICE_INFO_SINGLE_FP_CONFIG,
Expand Down
5 changes: 5 additions & 0 deletions offload/unittests/OffloadAPI/device/olGetDeviceInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,11 @@ OL_DEVICE_INFO_TEST_DEVICE_VALUE_GT(GlobalMemSize, uint64_t,
OL_DEVICE_INFO_GLOBAL_MEM_SIZE, 0);
OL_DEVICE_INFO_TEST_HOST_SUCCESS(GlobalMemSize, uint64_t,
OL_DEVICE_INFO_GLOBAL_MEM_SIZE);
OL_DEVICE_INFO_TEST_DEVICE_VALUE_GT(SharedMemSize, uint64_t,
OL_DEVICE_INFO_WORK_GROUP_LOCAL_MEM_SIZE,
0);
OL_DEVICE_INFO_TEST_HOST_SUCCESS(SharedMemSize, uint64_t,
OL_DEVICE_INFO_WORK_GROUP_LOCAL_MEM_SIZE);

TEST_P(olGetDeviceInfoTest, InvalidNullHandleDevice) {
ol_device_type_t DeviceType;
Expand Down
2 changes: 2 additions & 0 deletions offload/unittests/OffloadAPI/device/olGetDeviceInfoSize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ OL_DEVICE_INFO_SIZE_TEST_EQ(MaxMemAllocSize, uint64_t,
OL_DEVICE_INFO_MAX_MEM_ALLOC_SIZE);
OL_DEVICE_INFO_SIZE_TEST_EQ(GlobalMemSize, uint64_t,
OL_DEVICE_INFO_GLOBAL_MEM_SIZE);
OL_DEVICE_INFO_SIZE_TEST_EQ(SharedMemSize, uint64_t,
OL_DEVICE_INFO_WORK_GROUP_LOCAL_MEM_SIZE);

TEST_P(olGetDeviceInfoSizeTest, SuccessMaxWorkGroupSizePerDimension) {
size_t Size = 0;
Expand Down
Loading