diff --git a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp index 73e3bd31406ba..2d1ca52aad227 100644 --- a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include @@ -1721,15 +1720,6 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { return OFFLOAD_FAIL; } - if (Kind == TARGET_ALLOC_HOST) { - std::lock_guard Lock(HostAllocationsMutex); - size_t Erased = HostAllocations.erase(TgtPtr); - if (!Erased) { - REPORT("Cannot find a host allocation in the map\n"); - return OFFLOAD_FAIL; - } - } - return OFFLOAD_SUCCESS; } @@ -1779,7 +1769,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { AsyncInfoWrapperTy &AsyncInfoWrapper) override { // Use one-step asynchronous operation when host memory is already pinned. - if (isHostPinnedMemory(HstPtr)) { + if (isHostPinnedMemoryBuffer(HstPtr)) { AMDGPUStreamTy &Stream = getStream(AsyncInfoWrapper); return Stream.pushPinnedMemoryCopyAsync(TgtPtr, HstPtr, Size); } @@ -1833,7 +1823,9 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { /// Retrieve data from the device (device to host transfer). Error dataRetrieveImpl(void *HstPtr, const void *TgtPtr, int64_t Size, AsyncInfoWrapperTy &AsyncInfoWrapper) override { - if (isHostPinnedMemory(HstPtr)) { + + // Use one-step asynchronous operation when host memory is already pinned. + if (isHostPinnedMemoryBuffer(HstPtr)) { // Use one-step asynchronous operation when host memory is already pinned. AMDGPUStreamTy &Stream = getStream(AsyncInfoWrapper); return Stream.pushPinnedMemoryCopyAsync(HstPtr, TgtPtr, Size); @@ -2005,23 +1997,6 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { return Queues[Current % Queues.size()]; } - /// Check whether a buffer is a host pinned buffer. - bool isHostPinnedMemory(const void *Ptr) const { - bool Found = false; - HostAllocationsMutex.lock_shared(); - if (!HostAllocations.empty()) { - auto It = HostAllocations.lower_bound((const void *)Ptr); - if (It != HostAllocations.end() && It->first == Ptr) { - Found = true; - } else if (It != HostAllocations.begin()) { - --It; - Found = ((const char *)It->first + It->second > (const char *)Ptr); - } - } - HostAllocationsMutex.unlock_shared(); - return Found; - } - private: using AMDGPUStreamRef = AMDGPUResourceRef; using AMDGPUEventRef = AMDGPUResourceRef; @@ -2073,12 +2048,6 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { /// List of device packet queues. std::vector Queues; - - /// Map of host pinned allocations. We track these pinned allocations so that - /// memory transfers involving these allocations do not need a two-step copy - /// with an intermediate pinned buffer. - std::map HostAllocations; - mutable std::shared_mutex HostAllocationsMutex; }; Error AMDGPUDeviceImageTy::loadExecutable(const AMDGPUDeviceTy &Device) { @@ -2537,10 +2506,6 @@ void *AMDGPUDeviceTy::allocate(size_t Size, void *, TargetAllocTy Kind) { REPORT("%s\n", toString(std::move(Err)).data()); return nullptr; } - - // Keep track of the host pinned allocations for optimizations in transfers. - std::lock_guard Lock(HostAllocationsMutex); - HostAllocations.insert({Alloc, Size}); } return Alloc; diff --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp index 6ce6c29481c55..9f52562dc9571 100644 --- a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp +++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp @@ -356,6 +356,27 @@ Error GenericDeviceTy::registerKernelOffloadEntry( return Plugin::success(); } +Error GenericDeviceTy::registerHostPinnedMemoryBuffer(const void *Buffer, + size_t Size) { + std::lock_guard Lock(HostAllocationsMutex); + + auto Res = HostAllocations.insert({Buffer, Size}); + if (!Res.second) + return Plugin::error("Registering an already registered pinned buffer"); + + return Plugin::success(); +} + +Error GenericDeviceTy::unregisterHostPinnedMemoryBuffer(const void *Buffer) { + std::lock_guard Lock(HostAllocationsMutex); + + size_t Erased = HostAllocations.erase(Buffer); + if (!Erased) + return Plugin::error("Cannot find a registered host pinned buffer"); + + return Plugin::success(); +} + Error GenericDeviceTy::synchronize(__tgt_async_info *AsyncInfo) { if (!AsyncInfo || !AsyncInfo->Queue) return Plugin::error("Invalid async info queue"); @@ -391,14 +412,18 @@ Expected GenericDeviceTy::dataAlloc(int64_t Size, void *HostPtr, return Plugin::error("Failed to allocate from device allocator"); } - // Sucessful and valid allocation. - if (Alloc) - return Alloc; + // Report error if the memory manager or the device allocator did not return + // any memory buffer. + if (!Alloc) + return Plugin::error("Invalid target data allocation kind or requested " + "allocator not implemented yet"); - // At this point means that we did not tried to allocate from the memory - // manager nor the device allocator. - return Plugin::error("Invalid target data allocation kind or requested " - "allocator not implemented yet"); + // Register allocated buffer as pinned memory if the type is host memory. + if (Kind == TARGET_ALLOC_HOST) + if (auto Err = registerHostPinnedMemoryBuffer(Alloc, Size)) + return Err; + + return Alloc; } Error GenericDeviceTy::dataDelete(void *TgtPtr, TargetAllocTy Kind) { @@ -411,6 +436,11 @@ Error GenericDeviceTy::dataDelete(void *TgtPtr, TargetAllocTy Kind) { if (Res) return Plugin::error("Failure to deallocate device pointer %p", TgtPtr); + // Unregister deallocated pinned memory buffer if the type is host memory. + if (Kind == TARGET_ALLOC_HOST) + if (auto Err = unregisterHostPinnedMemoryBuffer(TgtPtr)) + return std::move(Err); + return Plugin::success(); } diff --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h index 9b89e316551db..45b0fb0637576 100644 --- a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h +++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h @@ -15,6 +15,7 @@ #include #include #include +#include #include #include "Debug.h" @@ -405,6 +406,12 @@ struct GenericDeviceTy : public DeviceAllocatorTy { /// setupDeviceEnvironment() function. virtual bool shouldSetupDeviceEnvironment() const { return true; } + /// Register a host buffer as host pinned allocation. + Error registerHostPinnedMemoryBuffer(const void *Buffer, size_t Size); + + /// Unregister a host pinned allocations. + Error unregisterHostPinnedMemoryBuffer(const void *Buffer); + /// Pointer to the memory manager or nullptr if not available. MemoryManagerTy *MemoryManager; @@ -419,7 +426,40 @@ struct GenericDeviceTy : public DeviceAllocatorTy { UInt64Envar OMPX_TargetStackSize; UInt64Envar OMPX_TargetHeapSize; + /// Map of host pinned allocations. We track these pinned allocations so that + /// memory transfers involving these allocations can be optimized. + std::map HostAllocations; + mutable std::shared_mutex HostAllocationsMutex; + protected: + /// Check whether a buffer has been registered as host pinned memory. + bool isHostPinnedMemoryBuffer(const void *Buffer) const { + std::shared_lock Lock(HostAllocationsMutex); + + if (HostAllocations.empty()) + return false; + + // Search the first allocation with starting address that is not less than + // the buffer address. + auto It = HostAllocations.lower_bound(Buffer); + + // Direct match of starting addresses. + if (It != HostAllocations.end() && It->first == Buffer) + return true; + + // Not direct match but may be a previous pinned allocation in the map which + // contains the buffer. Return false if there is no such a previous + // allocation. + if (It == HostAllocations.begin()) + return false; + + // Move to the previous pinned allocation. + --It; + + // Evaluate whether the buffer is contained in the pinned allocation. + return ((const char *)It->first + It->second > (const char *)Buffer); + } + /// Environment variables defined by the LLVM OpenMP implementation /// regarding the initial number of streams and events. UInt32Envar OMPX_InitialNumStreams;