diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp index 1d7980d694fa53..c4e49c7a8020d4 100644 --- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp @@ -314,7 +314,7 @@ hsa_status_t isValidMemoryPool(hsa_amd_memory_pool_t MemoryPool) { return (AllocAllowed && Size > 0) ? HSA_STATUS_SUCCESS : HSA_STATUS_ERROR; } -hsa_status_t addKernArgPool(hsa_amd_memory_pool_t MemoryPool, void *Data) { +hsa_status_t addMemoryPool(hsa_amd_memory_pool_t MemoryPool, void *Data) { std::vector *Result = static_cast *>(Data); @@ -323,66 +323,10 @@ hsa_status_t addKernArgPool(hsa_amd_memory_pool_t MemoryPool, void *Data) { return err; } - uint32_t GlobalFlags = 0; - err = hsa_amd_memory_pool_get_info( - MemoryPool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &GlobalFlags); - if (err != HSA_STATUS_SUCCESS) { - DP("Get memory pool info failed: %s\n", get_error_string(err)); - return err; - } - - if ((GlobalFlags & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED) && - (GlobalFlags & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT)) { - Result->push_back(MemoryPool); - } - - return HSA_STATUS_SUCCESS; -} - -template -hsa_status_t collectMemoryPools(const std::vector &Agents, - AccumulatorFunc Func) { - for (int DeviceId = 0; DeviceId < Agents.size(); DeviceId++) { - hsa_status_t Err = hsa::amd_agent_iterate_memory_pools( - Agents[DeviceId], [&](hsa_amd_memory_pool_t MemoryPool) { - hsa_status_t Err; - if ((Err = isValidMemoryPool(MemoryPool)) != HSA_STATUS_SUCCESS) { - DP("Skipping memory pool: %s\n", get_error_string(Err)); - } else - Func(MemoryPool, DeviceId); - return HSA_STATUS_SUCCESS; - }); - - if (Err != HSA_STATUS_SUCCESS) { - DP("[%s:%d] %s failed: %s\n", __FILE__, __LINE__, - "Iterate all memory pools", get_error_string(Err)); - return Err; - } - } - + Result->push_back(MemoryPool); return HSA_STATUS_SUCCESS; } -std::pair -FindKernargPool(const std::vector &HSAAgents) { - std::vector KernArgPools; - for (const auto &Agent : HSAAgents) { - hsa_status_t err = HSA_STATUS_SUCCESS; - err = hsa_amd_agent_iterate_memory_pools( - Agent, addKernArgPool, static_cast(&KernArgPools)); - if (err != HSA_STATUS_SUCCESS) { - DP("addKernArgPool returned %s, continuing\n", get_error_string(err)); - } - } - - if (KernArgPools.empty()) { - DP("Unable to find any valid kernarg pool\n"); - return {HSA_STATUS_ERROR, hsa_amd_memory_pool_t{}}; - } - - return {HSA_STATUS_SUCCESS, KernArgPools[0]}; -} - } // namespace } // namespace core @@ -620,49 +564,71 @@ class RTLDeviceInfoTy { return HSA_STATUS_SUCCESS; } - hsa_status_t addHostMemoryPool(hsa_amd_memory_pool_t MemoryPool, - int DeviceId) { - uint32_t GlobalFlags = 0; - hsa_status_t Err = hsa_amd_memory_pool_get_info( - MemoryPool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &GlobalFlags); + hsa_status_t setupDevicePools(const std::vector &Agents) { + for (int DeviceId = 0; DeviceId < Agents.size(); DeviceId++) { + hsa_status_t Err = hsa::amd_agent_iterate_memory_pools( + Agents[DeviceId], [&](hsa_amd_memory_pool_t MemoryPool) { + hsa_status_t ValidStatus = core::isValidMemoryPool(MemoryPool); + if (ValidStatus != HSA_STATUS_SUCCESS) { + DP("Alloc allowed in memory pool check failed: %s\n", + get_error_string(ValidStatus)); + return HSA_STATUS_SUCCESS; + } + return addDeviceMemoryPool(MemoryPool, DeviceId); + }); - if (Err != HSA_STATUS_SUCCESS) { - return Err; + if (Err != HSA_STATUS_SUCCESS) { + DP("[%s:%d] %s failed: %s\n", __FILE__, __LINE__, + "Iterate all memory pools", get_error_string(Err)); + return Err; + } } + return HSA_STATUS_SUCCESS; + } - uint32_t Size; - Err = hsa_amd_memory_pool_get_info(MemoryPool, - HSA_AMD_MEMORY_POOL_INFO_SIZE, &Size); - if (Err != HSA_STATUS_SUCCESS) { - return Err; - } + hsa_status_t setupHostMemoryPools(std::vector &Agents) { + std::vector HostPools; - if (GlobalFlags & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED && - Size > 0) { - HostFineGrainedMemoryPool = MemoryPool; + // collect all the "valid" pools for all the given agents. + for (const auto &Agent : Agents) { + hsa_status_t Err = hsa_amd_agent_iterate_memory_pools( + Agent, core::addMemoryPool, static_cast(&HostPools)); + if (Err != HSA_STATUS_SUCCESS) { + DP("[%s:%d] %s failed: %s\n", __FILE__, __LINE__, + "Iterate all memory pools", get_error_string(Err)); + return Err; + } } - return HSA_STATUS_SUCCESS; - } + // We need two fine-grained pools. + // 1. One with kernarg flag set for storing kernel arguments + // 2. Second for host allocations + bool FineGrainedMemoryPoolSet = false; + bool KernArgPoolSet = false; + for (const auto &MemoryPool : HostPools) { + hsa_status_t Err = HSA_STATUS_SUCCESS; + uint32_t GlobalFlags = 0; + Err = hsa_amd_memory_pool_get_info( + MemoryPool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &GlobalFlags); + if (Err != HSA_STATUS_SUCCESS) { + DP("Get memory pool info failed: %s\n", get_error_string(Err)); + return Err; + } - hsa_status_t setupMemoryPools() { - using namespace std::placeholders; - hsa_status_t Err; - Err = core::collectMemoryPools( - CPUAgents, std::bind(&RTLDeviceInfoTy::addHostMemoryPool, this, _1, _2)); - if (Err != HSA_STATUS_SUCCESS) { - DP("HSA error in collecting memory pools for CPU: %s\n", - get_error_string(Err)); - return Err; - } - Err = core::collectMemoryPools( - HSAAgents, std::bind(&RTLDeviceInfoTy::addDeviceMemoryPool, this, _1, _2)); - if (Err != HSA_STATUS_SUCCESS) { - DP("HSA error in collecting memory pools for offload devices: %s\n", - get_error_string(Err)); - return Err; + if (GlobalFlags & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED) { + if (GlobalFlags & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT) { + KernArgPool = MemoryPool; + KernArgPoolSet = true; + } + HostFineGrainedMemoryPool = MemoryPool; + FineGrainedMemoryPoolSet = true; + } } - return HSA_STATUS_SUCCESS; + + if (FineGrainedMemoryPoolSet && KernArgPoolSet) + return HSA_STATUS_SUCCESS; + + return HSA_STATUS_ERROR; } hsa_amd_memory_pool_t getDeviceMemoryPool(int DeviceId) { @@ -731,11 +697,6 @@ class RTLDeviceInfoTy { } else { DP("There are %d devices supporting HSA.\n", NumberOfDevices); } - std::tie(err, KernArgPool) = core::FindKernargPool(CPUAgents); - if (err != HSA_STATUS_SUCCESS) { - DP("Error when reading memory pools\n"); - return; - } // Init the device info HSAQueues.resize(NumberOfDevices); @@ -753,9 +714,15 @@ class RTLDeviceInfoTy { DeviceCoarseGrainedMemoryPools.resize(NumberOfDevices); DeviceFineGrainedMemoryPools.resize(NumberOfDevices); - err = setupMemoryPools(); + err = setupDevicePools(HSAAgents); + if (err != HSA_STATUS_SUCCESS) { + DP("Setup for Device Memory Pools failed\n"); + return; + } + + err = setupHostMemoryPools(CPUAgents); if (err != HSA_STATUS_SUCCESS) { - DP("Error when setting up memory pools"); + DP("Setup for Host Memory Pools failed\n"); return; }