[Offload] Use Error for allocating/deallocating in plugins #160811

kevinsala · 2025-09-26T05:08:49Z

No description provided.

llvmbot · 2025-09-26T05:09:23Z

@llvm/pr-subscribers-backend-amdgpu

Author: Kevin Sala Penades (kevinsala)

Changes

Patch is 23.45 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/160811.diff

6 Files Affected:

(modified) offload/plugins-nextgen/amdgpu/src/rtl.cpp (+34-43)
(modified) offload/plugins-nextgen/common/include/MemoryManager.h (+45-21)
(modified) offload/plugins-nextgen/common/src/PluginInterface.cpp (+40-26)
(modified) offload/plugins-nextgen/common/src/RPC.cpp (+18-6)
(modified) offload/plugins-nextgen/cuda/src/rtl.cpp (+18-27)
(modified) offload/plugins-nextgen/host/src/rtl.cpp (+3-3)

diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index 64470e9fabf46..7b834ee346e5d 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -423,7 +423,11 @@ struct AMDGPUMemoryManagerTy : public DeviceAllocatorTy {
     assert(MemoryManager && "Invalid memory manager");
     assert(PtrStorage && "Invalid pointer storage");
 
-    *PtrStorage = MemoryManager->allocate(Size, nullptr);
+    auto PtrStorageOrErr = MemoryManager->allocate(Size, nullptr);
+    if (!PtrStorageOrErr)
+      return PtrStorageOrErr.takeError();
+
+    *PtrStorage = *PtrStorageOrErr;
     if (Size && *PtrStorage == nullptr)
       return Plugin::error(ErrorCode::OUT_OF_RESOURCES,
                            "failure to allocate from AMDGPU memory manager");
@@ -443,15 +447,12 @@ struct AMDGPUMemoryManagerTy : public DeviceAllocatorTy {
 private:
   /// Allocation callback that will be called once the memory manager does not
   /// have more previously allocated buffers.
-  void *allocate(size_t Size, void *HstPtr, TargetAllocTy Kind) override;
+  Expected<void *> allocate(size_t Size, void *HstPtr,
+                            TargetAllocTy Kind) override;
 
   /// Deallocation callback that will be called by the memory manager.
-  int free(void *TgtPtr, TargetAllocTy Kind) override {
-    if (auto Err = MemoryPool->deallocate(TgtPtr)) {
-      consumeError(std::move(Err));
-      return OFFLOAD_FAIL;
-    }
-    return OFFLOAD_SUCCESS;
+  Error free(void *TgtPtr, TargetAllocTy Kind) override {
+    return MemoryPool->deallocate(TgtPtr);
   }
 
   /// The underlying plugin that owns this memory manager.
@@ -2339,12 +2340,12 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
   }
 
   /// Allocate memory on the device or related to the device.
-  void *allocate(size_t Size, void *, TargetAllocTy Kind) override;
+  Expected<void *> allocate(size_t Size, void *, TargetAllocTy Kind) override;
 
   /// Deallocate memory on the device or related to the device.
-  int free(void *TgtPtr, TargetAllocTy Kind) override {
+  Error free(void *TgtPtr, TargetAllocTy Kind) override {
     if (TgtPtr == nullptr)
-      return OFFLOAD_SUCCESS;
+      return Plugin::success();
 
     AMDGPUMemoryPoolTy *MemoryPool = nullptr;
     switch (Kind) {
@@ -2360,17 +2361,14 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
       break;
     }
 
-    if (!MemoryPool) {
-      REPORT("No memory pool for the specified allocation kind\n");
-      return OFFLOAD_FAIL;
-    }
+    if (!MemoryPool)
+      return Plugin::error(ErrorCode::OUT_OF_RESOURCES,
+                           "no memory pool for the specified allocation kind");
 
-    if (Error Err = MemoryPool->deallocate(TgtPtr)) {
-      REPORT("%s\n", toString(std::move(Err)).data());
-      return OFFLOAD_FAIL;
-    }
+    if (auto Err = MemoryPool->deallocate(TgtPtr))
+      return Err;
 
-    return OFFLOAD_SUCCESS;
+    return Plugin::success();
   }
 
   /// Synchronize current thread with the pending operations on the async info.
@@ -3813,14 +3811,13 @@ static Error Plugin::check(int32_t Code, const char *ErrFmt, ArgsTy... Args) {
   return Plugin::error(OffloadErrCode, ErrFmt, Args..., Desc);
 }
 
-void *AMDGPUMemoryManagerTy::allocate(size_t Size, void *HstPtr,
-                                      TargetAllocTy Kind) {
+Expected<void *> AMDGPUMemoryManagerTy::allocate(size_t Size, void *HstPtr,
+                                                 TargetAllocTy Kind) {
   // Allocate memory from the pool.
   void *Ptr = nullptr;
-  if (auto Err = MemoryPool->allocate(Size, &Ptr)) {
-    consumeError(std::move(Err));
-    return nullptr;
-  }
+  if (auto Err = MemoryPool->allocate(Size, &Ptr))
+    return std::move(Err);
+
   assert(Ptr && "Invalid pointer");
 
   // Get a list of agents that can access this memory pool.
@@ -3830,14 +3827,13 @@ void *AMDGPUMemoryManagerTy::allocate(size_t Size, void *HstPtr,
       [&](hsa_agent_t Agent) { return MemoryPool->canAccess(Agent); });
 
   // Allow all valid kernel agents to access the allocation.
-  if (auto Err = MemoryPool->enableAccess(Ptr, Size, Agents)) {
-    REPORT("%s\n", toString(std::move(Err)).data());
-    return nullptr;
-  }
+  if (auto Err = MemoryPool->enableAccess(Ptr, Size, Agents))
+    return std::move(Err);
   return Ptr;
 }
 
-void *AMDGPUDeviceTy::allocate(size_t Size, void *, TargetAllocTy Kind) {
+Expected<void *> AMDGPUDeviceTy::allocate(size_t Size, void *,
+                                          TargetAllocTy Kind) {
   if (Size == 0)
     return nullptr;
 
@@ -3856,17 +3852,14 @@ void *AMDGPUDeviceTy::allocate(size_t Size, void *, TargetAllocTy Kind) {
     break;
   }
 
-  if (!MemoryPool) {
-    REPORT("No memory pool for the specified allocation kind\n");
-    return nullptr;
-  }
+  if (!MemoryPool)
+    return Plugin::error(ErrorCode::UNSUPPORTED,
+                         "no memory pool for the specified allocation kind");
 
   // Allocate from the corresponding memory pool.
   void *Alloc = nullptr;
-  if (Error Err = MemoryPool->allocate(Size, &Alloc)) {
-    REPORT("%s\n", toString(std::move(Err)).data());
-    return nullptr;
-  }
+  if (auto Err = MemoryPool->allocate(Size, &Alloc))
+    return std::move(Err);
 
   if (Alloc) {
     // Get a list of agents that can access this memory pool. Inherently
@@ -3879,10 +3872,8 @@ void *AMDGPUDeviceTy::allocate(size_t Size, void *, TargetAllocTy Kind) {
                   });
 
     // Enable all valid kernel agents to access the buffer.
-    if (auto Err = MemoryPool->enableAccess(Alloc, Size, Agents)) {
-      REPORT("%s\n", toString(std::move(Err)).data());
-      return nullptr;
-    }
+    if (auto Err = MemoryPool->enableAccess(Alloc, Size, Agents))
+      return std::move(Err);
   }
 
   return Alloc;
diff --git a/offload/plugins-nextgen/common/include/MemoryManager.h b/offload/plugins-nextgen/common/include/MemoryManager.h
index a4f6e628c403a..8f6c1adcdaa58 100644
--- a/offload/plugins-nextgen/common/include/MemoryManager.h
+++ b/offload/plugins-nextgen/common/include/MemoryManager.h
@@ -25,6 +25,10 @@
 #include "Shared/Utils.h"
 #include "omptarget.h"
 
+#include "llvm/Support/Error.h"
+
+namespace llvm {
+
 /// Base class of per-device allocator.
 class DeviceAllocatorTy {
 public:
@@ -32,11 +36,13 @@ class DeviceAllocatorTy {
 
   /// Allocate a memory of size \p Size . \p HstPtr is used to assist the
   /// allocation.
-  virtual void *allocate(size_t Size, void *HstPtr,
-                         TargetAllocTy Kind = TARGET_ALLOC_DEFAULT) = 0;
+  virtual Expected<void *>
+  allocate(size_t Size, void *HstPtr,
+           TargetAllocTy Kind = TARGET_ALLOC_DEFAULT) = 0;
 
   /// Delete the pointer \p TgtPtr on the device
-  virtual int free(void *TgtPtr, TargetAllocTy Kind = TARGET_ALLOC_DEFAULT) = 0;
+  virtual Error free(void *TgtPtr,
+                     TargetAllocTy Kind = TARGET_ALLOC_DEFAULT) = 0;
 };
 
 /// Class of memory manager. The memory manager is per-device by using
@@ -134,17 +140,17 @@ class MemoryManagerTy {
   size_t SizeThreshold = 1U << 13;
 
   /// Request memory from target device
-  void *allocateOnDevice(size_t Size, void *HstPtr) const {
+  Expected<void *> allocateOnDevice(size_t Size, void *HstPtr) const {
     return DeviceAllocator.allocate(Size, HstPtr, TARGET_ALLOC_DEVICE);
   }
 
   /// Deallocate data on device
-  int deleteOnDevice(void *Ptr) const { return DeviceAllocator.free(Ptr); }
+  Error deleteOnDevice(void *Ptr) const { return DeviceAllocator.free(Ptr); }
 
   /// This function is called when it tries to allocate memory on device but the
   /// device returns out of memory. It will first free all memory in the
   /// FreeList and try to allocate again.
-  void *freeAndAllocate(size_t Size, void *HstPtr) {
+  Expected<void *> freeAndAllocate(size_t Size, void *HstPtr) {
     std::vector<void *> RemoveList;
 
     // Deallocate all memory in FreeList
@@ -154,7 +160,8 @@ class MemoryManagerTy {
       if (List.empty())
         continue;
       for (const NodeTy &N : List) {
-        deleteOnDevice(N.Ptr);
+        if (auto Err = deleteOnDevice(N.Ptr))
+          return Err;
         RemoveList.push_back(N.Ptr);
       }
       FreeLists[I].clear();
@@ -175,14 +182,22 @@ class MemoryManagerTy {
   /// allocate directly on the device. If a \p nullptr is returned, it might
   /// be because the device is OOM. In that case, it will free all unused
   /// memory and then try again.
-  void *allocateOrFreeAndAllocateOnDevice(size_t Size, void *HstPtr) {
-    void *TgtPtr = allocateOnDevice(Size, HstPtr);
+  Expected<void *> allocateOrFreeAndAllocateOnDevice(size_t Size,
+                                                     void *HstPtr) {
+    auto TgtPtrOrErr = allocateOnDevice(Size, HstPtr);
+    if (!TgtPtrOrErr)
+      return TgtPtrOrErr.takeError();
+
+    void *TgtPtr = *TgtPtrOrErr;
     // We cannot get memory from the device. It might be due to OOM. Let's
     // free all memory in FreeLists and try again.
     if (TgtPtr == nullptr) {
       DP("Failed to get memory on device. Free all memory in FreeLists and "
          "try again.\n");
-      TgtPtr = freeAndAllocate(Size, HstPtr);
+      TgtPtrOrErr = freeAndAllocate(Size, HstPtr);
+      if (!TgtPtrOrErr)
+        return TgtPtrOrErr.takeError();
+      TgtPtr = *TgtPtrOrErr;
     }
 
     if (TgtPtr == nullptr)
@@ -204,16 +219,17 @@ class MemoryManagerTy {
 
   /// Destructor
   ~MemoryManagerTy() {
-    for (auto Itr = PtrToNodeTable.begin(); Itr != PtrToNodeTable.end();
-         ++Itr) {
-      assert(Itr->second.Ptr && "nullptr in map table");
-      deleteOnDevice(Itr->second.Ptr);
+    for (auto &PtrToNode : PtrToNodeTable) {
+      assert(PtrToNode.second.Ptr && "nullptr in map table");
+      if (auto Err = deleteOnDevice(PtrToNode.second.Ptr))
+        REPORT("Failure to delete memory: %s\n",
+               toString(std::move(Err)).data());
     }
   }
 
   /// Allocate memory of size \p Size from target device. \p HstPtr is used to
   /// assist the allocation.
-  void *allocate(size_t Size, void *HstPtr) {
+  Expected<void *> allocate(size_t Size, void *HstPtr) {
     // If the size is zero, we will not bother the target device. Just return
     // nullptr directly.
     if (Size == 0)
@@ -228,11 +244,14 @@ class MemoryManagerTy {
       DP("%zu is greater than the threshold %zu. Allocate it directly from "
          "device\n",
          Size, SizeThreshold);
-      void *TgtPtr = allocateOrFreeAndAllocateOnDevice(Size, HstPtr);
+      auto TgtPtrOrErr = allocateOrFreeAndAllocateOnDevice(Size, HstPtr);
+      if (!TgtPtrOrErr)
+        return TgtPtrOrErr.takeError();
 
-      DP("Got target pointer " DPxMOD ". Return directly.\n", DPxPTR(TgtPtr));
+      DP("Got target pointer " DPxMOD ". Return directly.\n",
+         DPxPTR(*TgtPtrOrErr));
 
-      return TgtPtr;
+      return *TgtPtrOrErr;
     }
 
     NodeTy *NodePtr = nullptr;
@@ -260,8 +279,11 @@ class MemoryManagerTy {
     if (NodePtr == nullptr) {
       DP("Cannot find a node in the FreeLists. Allocate on device.\n");
       // Allocate one on device
-      void *TgtPtr = allocateOrFreeAndAllocateOnDevice(Size, HstPtr);
+      auto TgtPtrOrErr = allocateOrFreeAndAllocateOnDevice(Size, HstPtr);
+      if (!TgtPtrOrErr)
+        return TgtPtrOrErr.takeError();
 
+      void *TgtPtr = *TgtPtrOrErr;
       if (TgtPtr == nullptr)
         return nullptr;
 
@@ -282,7 +304,7 @@ class MemoryManagerTy {
   }
 
   /// Deallocate memory pointed by \p TgtPtr
-  int free(void *TgtPtr) {
+  Error free(void *TgtPtr) {
     DP("MemoryManagerTy::free: target memory " DPxMOD ".\n", DPxPTR(TgtPtr));
 
     NodeTy *P = nullptr;
@@ -314,7 +336,7 @@ class MemoryManagerTy {
       FreeLists[B].insert(*P);
     }
 
-    return OFFLOAD_SUCCESS;
+    return Error::success();
   }
 
   /// Get the size threshold from the environment variable
@@ -344,4 +366,6 @@ class MemoryManagerTy {
 constexpr const size_t MemoryManagerTy::BucketSize[];
 constexpr const int MemoryManagerTy::NumBuckets;
 
+} // namespace llvm
+
 #endif // LLVM_OPENMP_LIBOMPTARGET_PLUGINS_COMMON_MEMORYMANAGER_H
diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp
index 7d05dd25dbf75..15b6b9866e5a2 100644
--- a/offload/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp
@@ -73,11 +73,17 @@ struct RecordReplayTy {
   };
   llvm::SmallVector<GlobalEntry> GlobalEntries{};
 
-  void *suggestAddress(uint64_t MaxMemoryAllocation) {
+  Expected<void *> suggestAddress(uint64_t MaxMemoryAllocation) {
     // Get a valid pointer address for this system
-    void *Addr =
+    auto AddrOrErr =
         Device->allocate(1024, /*HstPtr=*/nullptr, TARGET_ALLOC_DEFAULT);
-    Device->free(Addr);
+    if (!AddrOrErr)
+      return AddrOrErr.takeError();
+
+    void *Addr = *AddrOrErr;
+    if (auto Err = Device->free(Addr))
+      return std::move(Err);
+
     // Align Address to MaxMemoryAllocation
     Addr = (void *)utils::alignPtr((Addr), MaxMemoryAllocation);
     return Addr;
@@ -86,8 +92,12 @@ struct RecordReplayTy {
   Error preAllocateVAMemory(uint64_t MaxMemoryAllocation, void *VAddr) {
     size_t ASize = MaxMemoryAllocation;
 
-    if (!VAddr && isRecording())
-      VAddr = suggestAddress(MaxMemoryAllocation);
+    if (!VAddr && isRecording()) {
+      auto VAddrOrErr = suggestAddress(MaxMemoryAllocation);
+      if (!VAddrOrErr)
+        return VAddrOrErr.takeError();
+      VAddr = *VAddrOrErr;
+    }
 
     DP("Request %ld bytes allocated at %p\n", MaxMemoryAllocation, VAddr);
 
@@ -117,8 +127,11 @@ struct RecordReplayTy {
     constexpr size_t STEP = 1024 * 1024 * 1024ULL;
     MemoryStart = nullptr;
     for (TotalSize = MAX_MEMORY_ALLOCATION; TotalSize > 0; TotalSize -= STEP) {
-      MemoryStart =
+      auto MemoryStartOrErr =
           Device->allocate(TotalSize, /*HstPtr=*/nullptr, TARGET_ALLOC_DEFAULT);
+      if (!MemoryStartOrErr)
+        return MemoryStartOrErr.takeError();
+      MemoryStart = *MemoryStartOrErr;
       if (MemoryStart)
         break;
     }
@@ -352,13 +365,15 @@ struct RecordReplayTy {
     return Plugin::success();
   }
 
-  void deinit() {
+  Error deinit() {
     if (UsedVAMap) {
       if (auto Err = Device->memoryVAUnMap(MemoryStart, TotalSize))
-        report_fatal_error("Error on releasing virtual memory space");
+        return Err;
     } else {
-      Device->free(MemoryStart);
+      if (auto Err = Device->free(MemoryStart))
+        return Err;
     }
+    return Plugin::success();
   }
 };
 } // namespace llvm::omp::target::plugin
@@ -838,7 +853,8 @@ Error GenericDeviceTy::deinit(GenericPluginTy &Plugin) {
 
   RecordReplayTy &RecordReplay = Plugin.getRecordReplay();
   if (RecordReplay.isRecordingOrReplaying())
-    RecordReplay.deinit();
+    if (auto Err = RecordReplay.deinit())
+      return Err;
 
   if (RPCServer)
     if (auto Err = RPCServer->deinitDevice(*this))
@@ -1297,7 +1313,10 @@ Expected<void *> GenericDeviceTy::dataAlloc(int64_t Size, void *HostPtr,
   case TARGET_ALLOC_DEFAULT:
   case TARGET_ALLOC_DEVICE:
     if (MemoryManager) {
-      Alloc = MemoryManager->allocate(Size, HostPtr);
+      auto AllocOrErr = MemoryManager->allocate(Size, HostPtr);
+      if (!AllocOrErr)
+        return AllocOrErr.takeError();
+      Alloc = *AllocOrErr;
       if (!Alloc)
         return Plugin::error(ErrorCode::OUT_OF_RESOURCES,
                              "failed to allocate from memory manager");
@@ -1305,12 +1324,16 @@ Expected<void *> GenericDeviceTy::dataAlloc(int64_t Size, void *HostPtr,
     }
     [[fallthrough]];
   case TARGET_ALLOC_HOST:
-  case TARGET_ALLOC_SHARED:
-    Alloc = allocate(Size, HostPtr, Kind);
+  case TARGET_ALLOC_SHARED: {
+    auto AllocOrErr = allocate(Size, HostPtr, Kind);
+    if (!AllocOrErr)
+      return AllocOrErr.takeError();
+    Alloc = *AllocOrErr;
     if (!Alloc)
       return Plugin::error(ErrorCode::OUT_OF_RESOURCES,
                            "failed to allocate from device allocator");
   }
+  }
 
   // Report error if the memory manager or the device allocator did not return
   // any memory buffer.
@@ -1382,28 +1405,19 @@ Error GenericDeviceTy::dataDelete(void *TgtPtr, TargetAllocTy Kind) {
 #undef DEALLOCATION_ERROR
   }
 
-  int Res;
   switch (Kind) {
   case TARGET_ALLOC_DEFAULT:
   case TARGET_ALLOC_DEVICE:
     if (MemoryManager) {
-      Res = MemoryManager->free(TgtPtr);
-      if (Res)
-        return Plugin::error(
-            ErrorCode::OUT_OF_RESOURCES,
-            "failure to deallocate device pointer %p via memory manager",
-            TgtPtr);
+      if (auto Err = MemoryManager->free(TgtPtr))
+        return Err;
       break;
     }
     [[fallthrough]];
   case TARGET_ALLOC_HOST:
   case TARGET_ALLOC_SHARED:
-    Res = free(TgtPtr, Kind);
-    if (Res)
-      return Plugin::error(
-          ErrorCode::UNKNOWN,
-          "failure to deallocate device pointer %p via device deallocator",
-          TgtPtr);
+    if (auto Err = free(TgtPtr, Kind))
+      return Err;
   }
 
   // Unregister deallocated pinned memory buffer if the type is host memory.
diff --git a/offload/plugins-nextgen/common/src/RPC.cpp b/offload/plugins-nextgen/common/src/RPC.cpp
index 17d69b49b3b7e..e19f2ef94de6e 100644
--- a/offload/plugins-nextgen/common/src/RPC.cpp
+++ b/offload/plugins-nextgen/common/src/RPC.cpp
@@ -28,15 +28,22 @@ rpc::Status handleOffloadOpcodes(plugin::GenericDeviceTy &Device,
   switch (Port.get_opcode()) {
   case LIBC_MALLOC: {
     Port.recv_and_send([&](rpc::Buffer *Buffer, uint32_t) {
-      Buffer->data[0] = reinterpret_cast<uintptr_t>(
-          Device.allocate(Buffer->data[0], nullptr, TARGET_ALLOC_DEVICE));
+      auto PtrOrErr =
+          Device.allocate(Buffer->data[0], nullptr, TARGET_ALLOC_DEVICE);
+      void *Ptr = nullptr;
+      if (!PtrOrErr)
+        llvm::consumeError(PtrOrErr.takeError());
+      else
+        Ptr = *PtrOrErr;
+      Buffer->data[0] = reinterpret_cast<uintptr_t>(Ptr);
     });
     break;
   }
   case LIBC_FREE: {
     Port.recv([&](rpc::Buffer *Buffer, uint32_t) {
-      Device.free(reinterpret_cast<void *>(Buffer->data[0]),
-                  TARGET_ALLOC_DEVICE);
+      if (auto Err = Device.free(reinterpret_cast<void *>(Buffer->data[0]),
+                                 TARGET_ALLOC_DEVICE))
+        llvm::consumeError(std::move(Err));
     });
     break;
   }
@@ -171,9 +178,13 @@ Error RPCServerTy::initDevice(plugin::GenericDeviceTy &Device,
                               plugin::DeviceImageTy &Image) {
   uint64_t NumPorts =
       std::min(Device.requestedRPCPortCount(), rpc::MAX_PORT_COUNT);
-  void *RPCBuffer = Device.allocate(
+  auto RPCBufferOrErr = Device.allocate(
       rpc::Server::allocation_size(Device.getWarpSize(), NumPorts), nullptr,
       TARGET_ALLOC_HOST);
+  if (!RPCBufferOrErr)
+    return RPCBufferOrErr.takeError();
+
+  void *RPCBuffer = *RPCBufferOrErr;
   if (!RPCBuffer)
     return plugin::Plugin::error(
         error::ErrorCode::UNKNOWN,
@@ -198,7 +209,8 @@ Error RPCServerTy::initDevice(plugin::GenericDeviceTy &Device,
 
 Error RPCServerTy::deinitDevice(plugin::GenericDeviceTy &Device) {
   std::lock_guard<decltype(BufferMutex)> Lock(BufferMutex);
-  Device.free(Buffers[Device.getDeviceId()], TARGET_ALLOC_HOST);
+  if (auto Err = Device.free(Buffers[Device.getDeviceId()], TARGET_ALLOC_HOST))
+    return Err;
   Buffers[Device.getDeviceId()] = nullptr;
   Devices[Device.getDeviceId()] = nullptr;
   return Error::success();
diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp
index b2f840113cff3..b30c651223cad 100644
--- a/offload/plugins-nextgen/cuda/src/rtl.cpp
+++ b/offload/plugins-nextgen/cuda/src/rtl.cpp
@@ -561,14 +561,12 @@ struct CUDADeviceTy : public GenericDeviceTy {
   }
 
   /// Allocate memory on the device or related to the device.
-  void *allocate(size_t Size, void *, TargetAllocTy Kind) override {
+  Expected<void *> allocate(size_t Size, void *, TargetAllocTy Kind) override {
     if (Size == 0)
       return nullptr;
 
-    if (auto Err = setContext()) {
-      REPORT("Failure to alloc memory: %s\n", toString(std::move(Err)).data());
-      return nullptr;
-    }
+    if (auto Err = setContext())
+      return...
[truncated]

llvmbot · 2025-09-26T05:09:24Z

@llvm/pr-subscribers-offload

Author: Kevin Sala Penades (kevinsala)

Changes

Patch is 23.45 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/160811.diff

6 Files Affected:

(modified) offload/plugins-nextgen/amdgpu/src/rtl.cpp (+34-43)
(modified) offload/plugins-nextgen/common/include/MemoryManager.h (+45-21)
(modified) offload/plugins-nextgen/common/src/PluginInterface.cpp (+40-26)
(modified) offload/plugins-nextgen/common/src/RPC.cpp (+18-6)
(modified) offload/plugins-nextgen/cuda/src/rtl.cpp (+18-27)
(modified) offload/plugins-nextgen/host/src/rtl.cpp (+3-3)

diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index 64470e9fabf46..7b834ee346e5d 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -423,7 +423,11 @@ struct AMDGPUMemoryManagerTy : public DeviceAllocatorTy {
     assert(MemoryManager && "Invalid memory manager");
     assert(PtrStorage && "Invalid pointer storage");
 
-    *PtrStorage = MemoryManager->allocate(Size, nullptr);
+    auto PtrStorageOrErr = MemoryManager->allocate(Size, nullptr);
+    if (!PtrStorageOrErr)
+      return PtrStorageOrErr.takeError();
+
+    *PtrStorage = *PtrStorageOrErr;
     if (Size && *PtrStorage == nullptr)
       return Plugin::error(ErrorCode::OUT_OF_RESOURCES,
                            "failure to allocate from AMDGPU memory manager");
@@ -443,15 +447,12 @@ struct AMDGPUMemoryManagerTy : public DeviceAllocatorTy {
 private:
   /// Allocation callback that will be called once the memory manager does not
   /// have more previously allocated buffers.
-  void *allocate(size_t Size, void *HstPtr, TargetAllocTy Kind) override;
+  Expected<void *> allocate(size_t Size, void *HstPtr,
+                            TargetAllocTy Kind) override;
 
   /// Deallocation callback that will be called by the memory manager.
-  int free(void *TgtPtr, TargetAllocTy Kind) override {
-    if (auto Err = MemoryPool->deallocate(TgtPtr)) {
-      consumeError(std::move(Err));
-      return OFFLOAD_FAIL;
-    }
-    return OFFLOAD_SUCCESS;
+  Error free(void *TgtPtr, TargetAllocTy Kind) override {
+    return MemoryPool->deallocate(TgtPtr);
   }
 
   /// The underlying plugin that owns this memory manager.
@@ -2339,12 +2340,12 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
   }
 
   /// Allocate memory on the device or related to the device.
-  void *allocate(size_t Size, void *, TargetAllocTy Kind) override;
+  Expected<void *> allocate(size_t Size, void *, TargetAllocTy Kind) override;
 
   /// Deallocate memory on the device or related to the device.
-  int free(void *TgtPtr, TargetAllocTy Kind) override {
+  Error free(void *TgtPtr, TargetAllocTy Kind) override {
     if (TgtPtr == nullptr)
-      return OFFLOAD_SUCCESS;
+      return Plugin::success();
 
     AMDGPUMemoryPoolTy *MemoryPool = nullptr;
     switch (Kind) {
@@ -2360,17 +2361,14 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
       break;
     }
 
-    if (!MemoryPool) {
-      REPORT("No memory pool for the specified allocation kind\n");
-      return OFFLOAD_FAIL;
-    }
+    if (!MemoryPool)
+      return Plugin::error(ErrorCode::OUT_OF_RESOURCES,
+                           "no memory pool for the specified allocation kind");
 
-    if (Error Err = MemoryPool->deallocate(TgtPtr)) {
-      REPORT("%s\n", toString(std::move(Err)).data());
-      return OFFLOAD_FAIL;
-    }
+    if (auto Err = MemoryPool->deallocate(TgtPtr))
+      return Err;
 
-    return OFFLOAD_SUCCESS;
+    return Plugin::success();
   }
 
   /// Synchronize current thread with the pending operations on the async info.
@@ -3813,14 +3811,13 @@ static Error Plugin::check(int32_t Code, const char *ErrFmt, ArgsTy... Args) {
   return Plugin::error(OffloadErrCode, ErrFmt, Args..., Desc);
 }
 
-void *AMDGPUMemoryManagerTy::allocate(size_t Size, void *HstPtr,
-                                      TargetAllocTy Kind) {
+Expected<void *> AMDGPUMemoryManagerTy::allocate(size_t Size, void *HstPtr,
+                                                 TargetAllocTy Kind) {
   // Allocate memory from the pool.
   void *Ptr = nullptr;
-  if (auto Err = MemoryPool->allocate(Size, &Ptr)) {
-    consumeError(std::move(Err));
-    return nullptr;
-  }
+  if (auto Err = MemoryPool->allocate(Size, &Ptr))
+    return std::move(Err);
+
   assert(Ptr && "Invalid pointer");
 
   // Get a list of agents that can access this memory pool.
@@ -3830,14 +3827,13 @@ void *AMDGPUMemoryManagerTy::allocate(size_t Size, void *HstPtr,
       [&](hsa_agent_t Agent) { return MemoryPool->canAccess(Agent); });
 
   // Allow all valid kernel agents to access the allocation.
-  if (auto Err = MemoryPool->enableAccess(Ptr, Size, Agents)) {
-    REPORT("%s\n", toString(std::move(Err)).data());
-    return nullptr;
-  }
+  if (auto Err = MemoryPool->enableAccess(Ptr, Size, Agents))
+    return std::move(Err);
   return Ptr;
 }
 
-void *AMDGPUDeviceTy::allocate(size_t Size, void *, TargetAllocTy Kind) {
+Expected<void *> AMDGPUDeviceTy::allocate(size_t Size, void *,
+                                          TargetAllocTy Kind) {
   if (Size == 0)
     return nullptr;
 
@@ -3856,17 +3852,14 @@ void *AMDGPUDeviceTy::allocate(size_t Size, void *, TargetAllocTy Kind) {
     break;
   }
 
-  if (!MemoryPool) {
-    REPORT("No memory pool for the specified allocation kind\n");
-    return nullptr;
-  }
+  if (!MemoryPool)
+    return Plugin::error(ErrorCode::UNSUPPORTED,
+                         "no memory pool for the specified allocation kind");
 
   // Allocate from the corresponding memory pool.
   void *Alloc = nullptr;
-  if (Error Err = MemoryPool->allocate(Size, &Alloc)) {
-    REPORT("%s\n", toString(std::move(Err)).data());
-    return nullptr;
-  }
+  if (auto Err = MemoryPool->allocate(Size, &Alloc))
+    return std::move(Err);
 
   if (Alloc) {
     // Get a list of agents that can access this memory pool. Inherently
@@ -3879,10 +3872,8 @@ void *AMDGPUDeviceTy::allocate(size_t Size, void *, TargetAllocTy Kind) {
                   });
 
     // Enable all valid kernel agents to access the buffer.
-    if (auto Err = MemoryPool->enableAccess(Alloc, Size, Agents)) {
-      REPORT("%s\n", toString(std::move(Err)).data());
-      return nullptr;
-    }
+    if (auto Err = MemoryPool->enableAccess(Alloc, Size, Agents))
+      return std::move(Err);
   }
 
   return Alloc;
diff --git a/offload/plugins-nextgen/common/include/MemoryManager.h b/offload/plugins-nextgen/common/include/MemoryManager.h
index a4f6e628c403a..8f6c1adcdaa58 100644
--- a/offload/plugins-nextgen/common/include/MemoryManager.h
+++ b/offload/plugins-nextgen/common/include/MemoryManager.h
@@ -25,6 +25,10 @@
 #include "Shared/Utils.h"
 #include "omptarget.h"
 
+#include "llvm/Support/Error.h"
+
+namespace llvm {
+
 /// Base class of per-device allocator.
 class DeviceAllocatorTy {
 public:
@@ -32,11 +36,13 @@ class DeviceAllocatorTy {
 
   /// Allocate a memory of size \p Size . \p HstPtr is used to assist the
   /// allocation.
-  virtual void *allocate(size_t Size, void *HstPtr,
-                         TargetAllocTy Kind = TARGET_ALLOC_DEFAULT) = 0;
+  virtual Expected<void *>
+  allocate(size_t Size, void *HstPtr,
+           TargetAllocTy Kind = TARGET_ALLOC_DEFAULT) = 0;
 
   /// Delete the pointer \p TgtPtr on the device
-  virtual int free(void *TgtPtr, TargetAllocTy Kind = TARGET_ALLOC_DEFAULT) = 0;
+  virtual Error free(void *TgtPtr,
+                     TargetAllocTy Kind = TARGET_ALLOC_DEFAULT) = 0;
 };
 
 /// Class of memory manager. The memory manager is per-device by using
@@ -134,17 +140,17 @@ class MemoryManagerTy {
   size_t SizeThreshold = 1U << 13;
 
   /// Request memory from target device
-  void *allocateOnDevice(size_t Size, void *HstPtr) const {
+  Expected<void *> allocateOnDevice(size_t Size, void *HstPtr) const {
     return DeviceAllocator.allocate(Size, HstPtr, TARGET_ALLOC_DEVICE);
   }
 
   /// Deallocate data on device
-  int deleteOnDevice(void *Ptr) const { return DeviceAllocator.free(Ptr); }
+  Error deleteOnDevice(void *Ptr) const { return DeviceAllocator.free(Ptr); }
 
   /// This function is called when it tries to allocate memory on device but the
   /// device returns out of memory. It will first free all memory in the
   /// FreeList and try to allocate again.
-  void *freeAndAllocate(size_t Size, void *HstPtr) {
+  Expected<void *> freeAndAllocate(size_t Size, void *HstPtr) {
     std::vector<void *> RemoveList;
 
     // Deallocate all memory in FreeList
@@ -154,7 +160,8 @@ class MemoryManagerTy {
       if (List.empty())
         continue;
       for (const NodeTy &N : List) {
-        deleteOnDevice(N.Ptr);
+        if (auto Err = deleteOnDevice(N.Ptr))
+          return Err;
         RemoveList.push_back(N.Ptr);
       }
       FreeLists[I].clear();
@@ -175,14 +182,22 @@ class MemoryManagerTy {
   /// allocate directly on the device. If a \p nullptr is returned, it might
   /// be because the device is OOM. In that case, it will free all unused
   /// memory and then try again.
-  void *allocateOrFreeAndAllocateOnDevice(size_t Size, void *HstPtr) {
-    void *TgtPtr = allocateOnDevice(Size, HstPtr);
+  Expected<void *> allocateOrFreeAndAllocateOnDevice(size_t Size,
+                                                     void *HstPtr) {
+    auto TgtPtrOrErr = allocateOnDevice(Size, HstPtr);
+    if (!TgtPtrOrErr)
+      return TgtPtrOrErr.takeError();
+
+    void *TgtPtr = *TgtPtrOrErr;
     // We cannot get memory from the device. It might be due to OOM. Let's
     // free all memory in FreeLists and try again.
     if (TgtPtr == nullptr) {
       DP("Failed to get memory on device. Free all memory in FreeLists and "
          "try again.\n");
-      TgtPtr = freeAndAllocate(Size, HstPtr);
+      TgtPtrOrErr = freeAndAllocate(Size, HstPtr);
+      if (!TgtPtrOrErr)
+        return TgtPtrOrErr.takeError();
+      TgtPtr = *TgtPtrOrErr;
     }
 
     if (TgtPtr == nullptr)
@@ -204,16 +219,17 @@ class MemoryManagerTy {
 
   /// Destructor
   ~MemoryManagerTy() {
-    for (auto Itr = PtrToNodeTable.begin(); Itr != PtrToNodeTable.end();
-         ++Itr) {
-      assert(Itr->second.Ptr && "nullptr in map table");
-      deleteOnDevice(Itr->second.Ptr);
+    for (auto &PtrToNode : PtrToNodeTable) {
+      assert(PtrToNode.second.Ptr && "nullptr in map table");
+      if (auto Err = deleteOnDevice(PtrToNode.second.Ptr))
+        REPORT("Failure to delete memory: %s\n",
+               toString(std::move(Err)).data());
     }
   }
 
   /// Allocate memory of size \p Size from target device. \p HstPtr is used to
   /// assist the allocation.
-  void *allocate(size_t Size, void *HstPtr) {
+  Expected<void *> allocate(size_t Size, void *HstPtr) {
     // If the size is zero, we will not bother the target device. Just return
     // nullptr directly.
     if (Size == 0)
@@ -228,11 +244,14 @@ class MemoryManagerTy {
       DP("%zu is greater than the threshold %zu. Allocate it directly from "
          "device\n",
          Size, SizeThreshold);
-      void *TgtPtr = allocateOrFreeAndAllocateOnDevice(Size, HstPtr);
+      auto TgtPtrOrErr = allocateOrFreeAndAllocateOnDevice(Size, HstPtr);
+      if (!TgtPtrOrErr)
+        return TgtPtrOrErr.takeError();
 
-      DP("Got target pointer " DPxMOD ". Return directly.\n", DPxPTR(TgtPtr));
+      DP("Got target pointer " DPxMOD ". Return directly.\n",
+         DPxPTR(*TgtPtrOrErr));
 
-      return TgtPtr;
+      return *TgtPtrOrErr;
     }
 
     NodeTy *NodePtr = nullptr;
@@ -260,8 +279,11 @@ class MemoryManagerTy {
     if (NodePtr == nullptr) {
       DP("Cannot find a node in the FreeLists. Allocate on device.\n");
       // Allocate one on device
-      void *TgtPtr = allocateOrFreeAndAllocateOnDevice(Size, HstPtr);
+      auto TgtPtrOrErr = allocateOrFreeAndAllocateOnDevice(Size, HstPtr);
+      if (!TgtPtrOrErr)
+        return TgtPtrOrErr.takeError();
 
+      void *TgtPtr = *TgtPtrOrErr;
       if (TgtPtr == nullptr)
         return nullptr;
 
@@ -282,7 +304,7 @@ class MemoryManagerTy {
   }
 
   /// Deallocate memory pointed by \p TgtPtr
-  int free(void *TgtPtr) {
+  Error free(void *TgtPtr) {
     DP("MemoryManagerTy::free: target memory " DPxMOD ".\n", DPxPTR(TgtPtr));
 
     NodeTy *P = nullptr;
@@ -314,7 +336,7 @@ class MemoryManagerTy {
       FreeLists[B].insert(*P);
     }
 
-    return OFFLOAD_SUCCESS;
+    return Error::success();
   }
 
   /// Get the size threshold from the environment variable
@@ -344,4 +366,6 @@ class MemoryManagerTy {
 constexpr const size_t MemoryManagerTy::BucketSize[];
 constexpr const int MemoryManagerTy::NumBuckets;
 
+} // namespace llvm
+
 #endif // LLVM_OPENMP_LIBOMPTARGET_PLUGINS_COMMON_MEMORYMANAGER_H
diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp
index 7d05dd25dbf75..15b6b9866e5a2 100644
--- a/offload/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp
@@ -73,11 +73,17 @@ struct RecordReplayTy {
   };
   llvm::SmallVector<GlobalEntry> GlobalEntries{};
 
-  void *suggestAddress(uint64_t MaxMemoryAllocation) {
+  Expected<void *> suggestAddress(uint64_t MaxMemoryAllocation) {
     // Get a valid pointer address for this system
-    void *Addr =
+    auto AddrOrErr =
         Device->allocate(1024, /*HstPtr=*/nullptr, TARGET_ALLOC_DEFAULT);
-    Device->free(Addr);
+    if (!AddrOrErr)
+      return AddrOrErr.takeError();
+
+    void *Addr = *AddrOrErr;
+    if (auto Err = Device->free(Addr))
+      return std::move(Err);
+
     // Align Address to MaxMemoryAllocation
     Addr = (void *)utils::alignPtr((Addr), MaxMemoryAllocation);
     return Addr;
@@ -86,8 +92,12 @@ struct RecordReplayTy {
   Error preAllocateVAMemory(uint64_t MaxMemoryAllocation, void *VAddr) {
     size_t ASize = MaxMemoryAllocation;
 
-    if (!VAddr && isRecording())
-      VAddr = suggestAddress(MaxMemoryAllocation);
+    if (!VAddr && isRecording()) {
+      auto VAddrOrErr = suggestAddress(MaxMemoryAllocation);
+      if (!VAddrOrErr)
+        return VAddrOrErr.takeError();
+      VAddr = *VAddrOrErr;
+    }
 
     DP("Request %ld bytes allocated at %p\n", MaxMemoryAllocation, VAddr);
 
@@ -117,8 +127,11 @@ struct RecordReplayTy {
     constexpr size_t STEP = 1024 * 1024 * 1024ULL;
     MemoryStart = nullptr;
     for (TotalSize = MAX_MEMORY_ALLOCATION; TotalSize > 0; TotalSize -= STEP) {
-      MemoryStart =
+      auto MemoryStartOrErr =
           Device->allocate(TotalSize, /*HstPtr=*/nullptr, TARGET_ALLOC_DEFAULT);
+      if (!MemoryStartOrErr)
+        return MemoryStartOrErr.takeError();
+      MemoryStart = *MemoryStartOrErr;
       if (MemoryStart)
         break;
     }
@@ -352,13 +365,15 @@ struct RecordReplayTy {
     return Plugin::success();
   }
 
-  void deinit() {
+  Error deinit() {
     if (UsedVAMap) {
       if (auto Err = Device->memoryVAUnMap(MemoryStart, TotalSize))
-        report_fatal_error("Error on releasing virtual memory space");
+        return Err;
     } else {
-      Device->free(MemoryStart);
+      if (auto Err = Device->free(MemoryStart))
+        return Err;
     }
+    return Plugin::success();
   }
 };
 } // namespace llvm::omp::target::plugin
@@ -838,7 +853,8 @@ Error GenericDeviceTy::deinit(GenericPluginTy &Plugin) {
 
   RecordReplayTy &RecordReplay = Plugin.getRecordReplay();
   if (RecordReplay.isRecordingOrReplaying())
-    RecordReplay.deinit();
+    if (auto Err = RecordReplay.deinit())
+      return Err;
 
   if (RPCServer)
     if (auto Err = RPCServer->deinitDevice(*this))
@@ -1297,7 +1313,10 @@ Expected<void *> GenericDeviceTy::dataAlloc(int64_t Size, void *HostPtr,
   case TARGET_ALLOC_DEFAULT:
   case TARGET_ALLOC_DEVICE:
     if (MemoryManager) {
-      Alloc = MemoryManager->allocate(Size, HostPtr);
+      auto AllocOrErr = MemoryManager->allocate(Size, HostPtr);
+      if (!AllocOrErr)
+        return AllocOrErr.takeError();
+      Alloc = *AllocOrErr;
       if (!Alloc)
         return Plugin::error(ErrorCode::OUT_OF_RESOURCES,
                              "failed to allocate from memory manager");
@@ -1305,12 +1324,16 @@ Expected<void *> GenericDeviceTy::dataAlloc(int64_t Size, void *HostPtr,
     }
     [[fallthrough]];
   case TARGET_ALLOC_HOST:
-  case TARGET_ALLOC_SHARED:
-    Alloc = allocate(Size, HostPtr, Kind);
+  case TARGET_ALLOC_SHARED: {
+    auto AllocOrErr = allocate(Size, HostPtr, Kind);
+    if (!AllocOrErr)
+      return AllocOrErr.takeError();
+    Alloc = *AllocOrErr;
     if (!Alloc)
       return Plugin::error(ErrorCode::OUT_OF_RESOURCES,
                            "failed to allocate from device allocator");
   }
+  }
 
   // Report error if the memory manager or the device allocator did not return
   // any memory buffer.
@@ -1382,28 +1405,19 @@ Error GenericDeviceTy::dataDelete(void *TgtPtr, TargetAllocTy Kind) {
 #undef DEALLOCATION_ERROR
   }
 
-  int Res;
   switch (Kind) {
   case TARGET_ALLOC_DEFAULT:
   case TARGET_ALLOC_DEVICE:
     if (MemoryManager) {
-      Res = MemoryManager->free(TgtPtr);
-      if (Res)
-        return Plugin::error(
-            ErrorCode::OUT_OF_RESOURCES,
-            "failure to deallocate device pointer %p via memory manager",
-            TgtPtr);
+      if (auto Err = MemoryManager->free(TgtPtr))
+        return Err;
       break;
     }
     [[fallthrough]];
   case TARGET_ALLOC_HOST:
   case TARGET_ALLOC_SHARED:
-    Res = free(TgtPtr, Kind);
-    if (Res)
-      return Plugin::error(
-          ErrorCode::UNKNOWN,
-          "failure to deallocate device pointer %p via device deallocator",
-          TgtPtr);
+    if (auto Err = free(TgtPtr, Kind))
+      return Err;
   }
 
   // Unregister deallocated pinned memory buffer if the type is host memory.
diff --git a/offload/plugins-nextgen/common/src/RPC.cpp b/offload/plugins-nextgen/common/src/RPC.cpp
index 17d69b49b3b7e..e19f2ef94de6e 100644
--- a/offload/plugins-nextgen/common/src/RPC.cpp
+++ b/offload/plugins-nextgen/common/src/RPC.cpp
@@ -28,15 +28,22 @@ rpc::Status handleOffloadOpcodes(plugin::GenericDeviceTy &Device,
   switch (Port.get_opcode()) {
   case LIBC_MALLOC: {
     Port.recv_and_send([&](rpc::Buffer *Buffer, uint32_t) {
-      Buffer->data[0] = reinterpret_cast<uintptr_t>(
-          Device.allocate(Buffer->data[0], nullptr, TARGET_ALLOC_DEVICE));
+      auto PtrOrErr =
+          Device.allocate(Buffer->data[0], nullptr, TARGET_ALLOC_DEVICE);
+      void *Ptr = nullptr;
+      if (!PtrOrErr)
+        llvm::consumeError(PtrOrErr.takeError());
+      else
+        Ptr = *PtrOrErr;
+      Buffer->data[0] = reinterpret_cast<uintptr_t>(Ptr);
     });
     break;
   }
   case LIBC_FREE: {
     Port.recv([&](rpc::Buffer *Buffer, uint32_t) {
-      Device.free(reinterpret_cast<void *>(Buffer->data[0]),
-                  TARGET_ALLOC_DEVICE);
+      if (auto Err = Device.free(reinterpret_cast<void *>(Buffer->data[0]),
+                                 TARGET_ALLOC_DEVICE))
+        llvm::consumeError(std::move(Err));
     });
     break;
   }
@@ -171,9 +178,13 @@ Error RPCServerTy::initDevice(plugin::GenericDeviceTy &Device,
                               plugin::DeviceImageTy &Image) {
   uint64_t NumPorts =
       std::min(Device.requestedRPCPortCount(), rpc::MAX_PORT_COUNT);
-  void *RPCBuffer = Device.allocate(
+  auto RPCBufferOrErr = Device.allocate(
       rpc::Server::allocation_size(Device.getWarpSize(), NumPorts), nullptr,
       TARGET_ALLOC_HOST);
+  if (!RPCBufferOrErr)
+    return RPCBufferOrErr.takeError();
+
+  void *RPCBuffer = *RPCBufferOrErr;
   if (!RPCBuffer)
     return plugin::Plugin::error(
         error::ErrorCode::UNKNOWN,
@@ -198,7 +209,8 @@ Error RPCServerTy::initDevice(plugin::GenericDeviceTy &Device,
 
 Error RPCServerTy::deinitDevice(plugin::GenericDeviceTy &Device) {
   std::lock_guard<decltype(BufferMutex)> Lock(BufferMutex);
-  Device.free(Buffers[Device.getDeviceId()], TARGET_ALLOC_HOST);
+  if (auto Err = Device.free(Buffers[Device.getDeviceId()], TARGET_ALLOC_HOST))
+    return Err;
   Buffers[Device.getDeviceId()] = nullptr;
   Devices[Device.getDeviceId()] = nullptr;
   return Error::success();
diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp
index b2f840113cff3..b30c651223cad 100644
--- a/offload/plugins-nextgen/cuda/src/rtl.cpp
+++ b/offload/plugins-nextgen/cuda/src/rtl.cpp
@@ -561,14 +561,12 @@ struct CUDADeviceTy : public GenericDeviceTy {
   }
 
   /// Allocate memory on the device or related to the device.
-  void *allocate(size_t Size, void *, TargetAllocTy Kind) override {
+  Expected<void *> allocate(size_t Size, void *, TargetAllocTy Kind) override {
     if (Size == 0)
       return nullptr;
 
-    if (auto Err = setContext()) {
-      REPORT("Failure to alloc memory: %s\n", toString(std::move(Err)).data());
-      return nullptr;
-    }
+    if (auto Err = setContext())
+      return...
[truncated]

koparasy

LGTM

…lvm#160811)" This reverts commit 01d761a.

Co-authored-by: Joseph Huber <huberjn@outlook.com>

[Offload] Use Error for allocating/deallocating in plugins

2867126

kevinsala requested review from RossBrunton, jhuber6 and shiltian September 26, 2025 05:08

llvmbot added backend:AMDGPU offload labels Sep 26, 2025

kevinsala requested a review from koparasy September 26, 2025 05:16

jhuber6 approved these changes Sep 26, 2025

View reviewed changes

koparasy approved these changes Sep 26, 2025

View reviewed changes

Merge branch 'main' into use-errors-for-plugin-alloc

fb676dc

jhuber6 merged commit 01d761a into llvm:main Sep 26, 2025
9 checks passed

searlmc1 pushed a commit to ROCm/llvm-project that referenced this pull request Sep 27, 2025

Revert "[Offload] Use Error for allocating/deallocating in plugins (l…

57ea703

…lvm#160811)" This reverts commit 01d761a.

searlmc1 pushed a commit to ROCm/llvm-project that referenced this pull request Oct 1, 2025

[Offload] Use Error for allocating/deallocating in plugins (llvm#160811)

22fbdef

Co-authored-by: Joseph Huber <huberjn@outlook.com>

searlmc1 pushed a commit to ROCm/llvm-project that referenced this pull request Oct 1, 2025

reland offload use error (llvm#160811) (llvm#4278)

ebc87b7

mahesh-attarde pushed a commit to mahesh-attarde/llvm-project that referenced this pull request Oct 3, 2025

[Offload] Use Error for allocating/deallocating in plugins (llvm#160811)

ce14b3c

Co-authored-by: Joseph Huber <huberjn@outlook.com>

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[Offload] Use Error for allocating/deallocating in plugins #160811

[Offload] Use Error for allocating/deallocating in plugins #160811

Uh oh!

kevinsala commented Sep 26, 2025

Uh oh!

llvmbot commented Sep 26, 2025

Uh oh!

llvmbot commented Sep 26, 2025

Uh oh!

koparasy left a comment

Uh oh!

Uh oh!

Uh oh!

[Offload] Use Error for allocating/deallocating in plugins #160811

[Offload] Use Error for allocating/deallocating in plugins #160811

Uh oh!

Conversation

kevinsala commented Sep 26, 2025

Uh oh!

llvmbot commented Sep 26, 2025

Uh oh!

llvmbot commented Sep 26, 2025

Uh oh!

koparasy left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!