Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 34 additions & 43 deletions offload/plugins-nextgen/amdgpu/src/rtl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -423,7 +423,11 @@ struct AMDGPUMemoryManagerTy : public DeviceAllocatorTy {
assert(MemoryManager && "Invalid memory manager");
assert(PtrStorage && "Invalid pointer storage");

*PtrStorage = MemoryManager->allocate(Size, nullptr);
auto PtrStorageOrErr = MemoryManager->allocate(Size, nullptr);
if (!PtrStorageOrErr)
return PtrStorageOrErr.takeError();

*PtrStorage = *PtrStorageOrErr;
if (Size && *PtrStorage == nullptr)
return Plugin::error(ErrorCode::OUT_OF_RESOURCES,
"failure to allocate from AMDGPU memory manager");
Expand All @@ -443,15 +447,12 @@ struct AMDGPUMemoryManagerTy : public DeviceAllocatorTy {
private:
/// Allocation callback that will be called once the memory manager does not
/// have more previously allocated buffers.
void *allocate(size_t Size, void *HstPtr, TargetAllocTy Kind) override;
Expected<void *> allocate(size_t Size, void *HstPtr,
TargetAllocTy Kind) override;

/// Deallocation callback that will be called by the memory manager.
int free(void *TgtPtr, TargetAllocTy Kind) override {
if (auto Err = MemoryPool->deallocate(TgtPtr)) {
consumeError(std::move(Err));
return OFFLOAD_FAIL;
}
return OFFLOAD_SUCCESS;
Error free(void *TgtPtr, TargetAllocTy Kind) override {
return MemoryPool->deallocate(TgtPtr);
}

/// The underlying plugin that owns this memory manager.
Expand Down Expand Up @@ -2339,12 +2340,12 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
}

/// Allocate memory on the device or related to the device.
void *allocate(size_t Size, void *, TargetAllocTy Kind) override;
Expected<void *> allocate(size_t Size, void *, TargetAllocTy Kind) override;

/// Deallocate memory on the device or related to the device.
int free(void *TgtPtr, TargetAllocTy Kind) override {
Error free(void *TgtPtr, TargetAllocTy Kind) override {
if (TgtPtr == nullptr)
return OFFLOAD_SUCCESS;
return Plugin::success();

AMDGPUMemoryPoolTy *MemoryPool = nullptr;
switch (Kind) {
Expand All @@ -2360,17 +2361,14 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
break;
}

if (!MemoryPool) {
REPORT("No memory pool for the specified allocation kind\n");
return OFFLOAD_FAIL;
}
if (!MemoryPool)
return Plugin::error(ErrorCode::OUT_OF_RESOURCES,
"no memory pool for the specified allocation kind");

if (Error Err = MemoryPool->deallocate(TgtPtr)) {
REPORT("%s\n", toString(std::move(Err)).data());
return OFFLOAD_FAIL;
}
if (auto Err = MemoryPool->deallocate(TgtPtr))
return Err;

return OFFLOAD_SUCCESS;
return Plugin::success();
}

/// Synchronize current thread with the pending operations on the async info.
Expand Down Expand Up @@ -3813,14 +3811,13 @@ static Error Plugin::check(int32_t Code, const char *ErrFmt, ArgsTy... Args) {
return Plugin::error(OffloadErrCode, ErrFmt, Args..., Desc);
}

void *AMDGPUMemoryManagerTy::allocate(size_t Size, void *HstPtr,
TargetAllocTy Kind) {
Expected<void *> AMDGPUMemoryManagerTy::allocate(size_t Size, void *HstPtr,
TargetAllocTy Kind) {
// Allocate memory from the pool.
void *Ptr = nullptr;
if (auto Err = MemoryPool->allocate(Size, &Ptr)) {
consumeError(std::move(Err));
return nullptr;
}
if (auto Err = MemoryPool->allocate(Size, &Ptr))
return std::move(Err);

assert(Ptr && "Invalid pointer");

// Get a list of agents that can access this memory pool.
Expand All @@ -3830,14 +3827,13 @@ void *AMDGPUMemoryManagerTy::allocate(size_t Size, void *HstPtr,
[&](hsa_agent_t Agent) { return MemoryPool->canAccess(Agent); });

// Allow all valid kernel agents to access the allocation.
if (auto Err = MemoryPool->enableAccess(Ptr, Size, Agents)) {
REPORT("%s\n", toString(std::move(Err)).data());
return nullptr;
}
if (auto Err = MemoryPool->enableAccess(Ptr, Size, Agents))
return std::move(Err);
return Ptr;
}

void *AMDGPUDeviceTy::allocate(size_t Size, void *, TargetAllocTy Kind) {
Expected<void *> AMDGPUDeviceTy::allocate(size_t Size, void *,
TargetAllocTy Kind) {
if (Size == 0)
return nullptr;

Expand All @@ -3856,17 +3852,14 @@ void *AMDGPUDeviceTy::allocate(size_t Size, void *, TargetAllocTy Kind) {
break;
}

if (!MemoryPool) {
REPORT("No memory pool for the specified allocation kind\n");
return nullptr;
}
if (!MemoryPool)
return Plugin::error(ErrorCode::UNSUPPORTED,
"no memory pool for the specified allocation kind");

// Allocate from the corresponding memory pool.
void *Alloc = nullptr;
if (Error Err = MemoryPool->allocate(Size, &Alloc)) {
REPORT("%s\n", toString(std::move(Err)).data());
return nullptr;
}
if (auto Err = MemoryPool->allocate(Size, &Alloc))
return std::move(Err);

if (Alloc) {
// Get a list of agents that can access this memory pool. Inherently
Expand All @@ -3879,10 +3872,8 @@ void *AMDGPUDeviceTy::allocate(size_t Size, void *, TargetAllocTy Kind) {
});

// Enable all valid kernel agents to access the buffer.
if (auto Err = MemoryPool->enableAccess(Alloc, Size, Agents)) {
REPORT("%s\n", toString(std::move(Err)).data());
return nullptr;
}
if (auto Err = MemoryPool->enableAccess(Alloc, Size, Agents))
return std::move(Err);
}

return Alloc;
Expand Down
66 changes: 45 additions & 21 deletions offload/plugins-nextgen/common/include/MemoryManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,18 +25,24 @@
#include "Shared/Utils.h"
#include "omptarget.h"

#include "llvm/Support/Error.h"

namespace llvm {

/// Base class of per-device allocator.
class DeviceAllocatorTy {
public:
virtual ~DeviceAllocatorTy() = default;

/// Allocate a memory of size \p Size . \p HstPtr is used to assist the
/// allocation.
virtual void *allocate(size_t Size, void *HstPtr,
TargetAllocTy Kind = TARGET_ALLOC_DEFAULT) = 0;
virtual Expected<void *>
allocate(size_t Size, void *HstPtr,
TargetAllocTy Kind = TARGET_ALLOC_DEFAULT) = 0;

/// Delete the pointer \p TgtPtr on the device
virtual int free(void *TgtPtr, TargetAllocTy Kind = TARGET_ALLOC_DEFAULT) = 0;
virtual Error free(void *TgtPtr,
TargetAllocTy Kind = TARGET_ALLOC_DEFAULT) = 0;
};

/// Class of memory manager. The memory manager is per-device by using
Expand Down Expand Up @@ -134,17 +140,17 @@ class MemoryManagerTy {
size_t SizeThreshold = 1U << 13;

/// Request memory from target device
void *allocateOnDevice(size_t Size, void *HstPtr) const {
Expected<void *> allocateOnDevice(size_t Size, void *HstPtr) const {
return DeviceAllocator.allocate(Size, HstPtr, TARGET_ALLOC_DEVICE);
}

/// Deallocate data on device
int deleteOnDevice(void *Ptr) const { return DeviceAllocator.free(Ptr); }
Error deleteOnDevice(void *Ptr) const { return DeviceAllocator.free(Ptr); }

/// This function is called when it tries to allocate memory on device but the
/// device returns out of memory. It will first free all memory in the
/// FreeList and try to allocate again.
void *freeAndAllocate(size_t Size, void *HstPtr) {
Expected<void *> freeAndAllocate(size_t Size, void *HstPtr) {
std::vector<void *> RemoveList;

// Deallocate all memory in FreeList
Expand All @@ -154,7 +160,8 @@ class MemoryManagerTy {
if (List.empty())
continue;
for (const NodeTy &N : List) {
deleteOnDevice(N.Ptr);
if (auto Err = deleteOnDevice(N.Ptr))
return Err;
RemoveList.push_back(N.Ptr);
}
FreeLists[I].clear();
Expand All @@ -175,14 +182,22 @@ class MemoryManagerTy {
/// allocate directly on the device. If a \p nullptr is returned, it might
/// be because the device is OOM. In that case, it will free all unused
/// memory and then try again.
void *allocateOrFreeAndAllocateOnDevice(size_t Size, void *HstPtr) {
void *TgtPtr = allocateOnDevice(Size, HstPtr);
Expected<void *> allocateOrFreeAndAllocateOnDevice(size_t Size,
void *HstPtr) {
auto TgtPtrOrErr = allocateOnDevice(Size, HstPtr);
if (!TgtPtrOrErr)
return TgtPtrOrErr.takeError();

void *TgtPtr = *TgtPtrOrErr;
// We cannot get memory from the device. It might be due to OOM. Let's
// free all memory in FreeLists and try again.
if (TgtPtr == nullptr) {
DP("Failed to get memory on device. Free all memory in FreeLists and "
"try again.\n");
TgtPtr = freeAndAllocate(Size, HstPtr);
TgtPtrOrErr = freeAndAllocate(Size, HstPtr);
if (!TgtPtrOrErr)
return TgtPtrOrErr.takeError();
TgtPtr = *TgtPtrOrErr;
}

if (TgtPtr == nullptr)
Expand All @@ -204,16 +219,17 @@ class MemoryManagerTy {

/// Destructor
~MemoryManagerTy() {
for (auto Itr = PtrToNodeTable.begin(); Itr != PtrToNodeTable.end();
++Itr) {
assert(Itr->second.Ptr && "nullptr in map table");
deleteOnDevice(Itr->second.Ptr);
for (auto &PtrToNode : PtrToNodeTable) {
assert(PtrToNode.second.Ptr && "nullptr in map table");
if (auto Err = deleteOnDevice(PtrToNode.second.Ptr))
REPORT("Failure to delete memory: %s\n",
toString(std::move(Err)).data());
}
}

/// Allocate memory of size \p Size from target device. \p HstPtr is used to
/// assist the allocation.
void *allocate(size_t Size, void *HstPtr) {
Expected<void *> allocate(size_t Size, void *HstPtr) {
// If the size is zero, we will not bother the target device. Just return
// nullptr directly.
if (Size == 0)
Expand All @@ -228,11 +244,14 @@ class MemoryManagerTy {
DP("%zu is greater than the threshold %zu. Allocate it directly from "
"device\n",
Size, SizeThreshold);
void *TgtPtr = allocateOrFreeAndAllocateOnDevice(Size, HstPtr);
auto TgtPtrOrErr = allocateOrFreeAndAllocateOnDevice(Size, HstPtr);
if (!TgtPtrOrErr)
return TgtPtrOrErr.takeError();

DP("Got target pointer " DPxMOD ". Return directly.\n", DPxPTR(TgtPtr));
DP("Got target pointer " DPxMOD ". Return directly.\n",
DPxPTR(*TgtPtrOrErr));

return TgtPtr;
return *TgtPtrOrErr;
}

NodeTy *NodePtr = nullptr;
Expand Down Expand Up @@ -260,8 +279,11 @@ class MemoryManagerTy {
if (NodePtr == nullptr) {
DP("Cannot find a node in the FreeLists. Allocate on device.\n");
// Allocate one on device
void *TgtPtr = allocateOrFreeAndAllocateOnDevice(Size, HstPtr);
auto TgtPtrOrErr = allocateOrFreeAndAllocateOnDevice(Size, HstPtr);
if (!TgtPtrOrErr)
return TgtPtrOrErr.takeError();

void *TgtPtr = *TgtPtrOrErr;
if (TgtPtr == nullptr)
return nullptr;

Expand All @@ -282,7 +304,7 @@ class MemoryManagerTy {
}

/// Deallocate memory pointed by \p TgtPtr
int free(void *TgtPtr) {
Error free(void *TgtPtr) {
DP("MemoryManagerTy::free: target memory " DPxMOD ".\n", DPxPTR(TgtPtr));

NodeTy *P = nullptr;
Expand Down Expand Up @@ -314,7 +336,7 @@ class MemoryManagerTy {
FreeLists[B].insert(*P);
}

return OFFLOAD_SUCCESS;
return Error::success();
}

/// Get the size threshold from the environment variable
Expand Down Expand Up @@ -344,4 +366,6 @@ class MemoryManagerTy {
constexpr const size_t MemoryManagerTy::BucketSize[];
constexpr const int MemoryManagerTy::NumBuckets;

} // namespace llvm

#endif // LLVM_OPENMP_LIBOMPTARGET_PLUGINS_COMMON_MEMORYMANAGER_H
Loading
Loading