-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[Offload] Re-allocate overlapping memory #159567
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-offload Author: Ross Brunton (RossBrunton) ChangesIf olMemAlloc happens to allocate memory that was already allocated A new Full diff: https://github.com/llvm/llvm-project/pull/159567.diff 3 Files Affected:
diff --git a/offload/liboffload/API/Memory.td b/offload/liboffload/API/Memory.td
index cc98b672a26a9..debda165d2b23 100644
--- a/offload/liboffload/API/Memory.td
+++ b/offload/liboffload/API/Memory.td
@@ -21,6 +21,9 @@ def ol_alloc_type_t : Enum {
def olMemAlloc : Function {
let desc = "Creates a memory allocation on the specified device.";
+ let details = [
+ "All allocations through olMemAlloc regardless of source share a single virtual address range. There is no risk of multiple devices returning equal pointers to different memory."
+ ];
let params = [
Param<"ol_device_handle_t", "Device", "handle of the device to allocate on", PARAM_IN>,
Param<"ol_alloc_type_t", "Type", "type of the allocation", PARAM_IN>,
diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index b5b9b0e83b975..ed969f5f01095 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -182,6 +182,9 @@ namespace offload {
struct AllocInfo {
ol_device_handle_t Device;
ol_alloc_type_t Type;
+ void *Start;
+ // One byte past the end
+ void *End;
};
// Global shared state for liboffload
@@ -200,6 +203,9 @@ struct OffloadContext {
bool ValidationEnabled = true;
DenseMap<void *, AllocInfo> AllocInfoMap{};
std::mutex AllocInfoMapMutex{};
+ // Partitioned list of memory base addresses. Each element in this list is a
+ // key in AllocInfoMap
+ llvm::SmallVector<void *> AllocBases{};
SmallVector<ol_platform_impl_t, 4> Platforms{};
size_t RefCount;
@@ -615,18 +621,58 @@ TargetAllocTy convertOlToPluginAllocTy(ol_alloc_type_t Type) {
Error olMemAlloc_impl(ol_device_handle_t Device, ol_alloc_type_t Type,
size_t Size, void **AllocationOut) {
- auto Alloc =
- Device->Device->dataAlloc(Size, nullptr, convertOlToPluginAllocTy(Type));
- if (!Alloc)
- return Alloc.takeError();
+ void *OldAlloc = nullptr;
+
+ // Repeat the allocation up to a certain amount of times. If it happens to
+ // already be allocated (e.g. by a device from another vendor) throw it away
+ // and try again.
+ for (size_t Count = 0; Count < 10; Count++) {
+ auto NewAlloc = Device->Device->dataAlloc(Size, nullptr,
+ convertOlToPluginAllocTy(Type));
+ if (!NewAlloc)
+ return NewAlloc.takeError();
+
+ if (OldAlloc)
+ if (auto Err = Device->Device->dataDelete(OldAlloc,
+ convertOlToPluginAllocTy(Type)))
+ return Err;
- *AllocationOut = *Alloc;
- {
- std::lock_guard<std::mutex> Lock(OffloadContext::get().AllocInfoMapMutex);
- OffloadContext::get().AllocInfoMap.insert_or_assign(
- *Alloc, AllocInfo{Device, Type});
+ void *NewEnd = &reinterpret_cast<char *>(*NewAlloc)[Size];
+ auto &AllocBases = OffloadContext::get().AllocBases;
+ auto &AllocInfoMap = OffloadContext::get().AllocInfoMap;
+ {
+ std::lock_guard<std::mutex> Lock(OffloadContext::get().AllocInfoMapMutex);
+
+ // Check that this memory region doesn't overlap another one
+ // That is, the start of this allocation needs to be after another
+ // allocation's end point, and the end of this allocation needs to be
+ // before the next one's start.
+ // `Gap` is the first alloc who ends after the new alloc's start point.
+ auto Gap =
+ std::lower_bound(AllocBases.begin(), AllocBases.end(), *NewAlloc,
+ [&](const void *Iter, const void *Val) {
+ return AllocInfoMap.at(Iter).End <= Val;
+ });
+ if (Gap == AllocBases.end() || NewEnd <= AllocInfoMap.at(*Gap).Start) {
+ // Success, no conflict
+ AllocInfoMap.insert_or_assign(
+ *NewAlloc, AllocInfo{Device, Type, *NewAlloc, NewEnd});
+ AllocBases.insert(
+ std::lower_bound(AllocBases.begin(), AllocBases.end(), *NewAlloc),
+ *NewAlloc);
+ *AllocationOut = *NewAlloc;
+ return Error::success();
+ }
+
+ // To avoid the next attempt allocating the same memory we just freed, we
+ // hold onto it until we complete the next allocation
+ OldAlloc = *NewAlloc;
+ }
}
- return Error::success();
+
+ // We've tried multiple times, and can't allocate a non-overlapping region.
+ return createOffloadError(ErrorCode::BACKEND_FAILURE,
+ "failed to allocate non-overlapping memory");
}
Error olMemFree_impl(void *Address) {
@@ -642,6 +688,9 @@ Error olMemFree_impl(void *Address) {
Device = AllocInfo.Device;
Type = AllocInfo.Type;
OffloadContext::get().AllocInfoMap.erase(Address);
+
+ auto &Bases = OffloadContext::get().AllocBases;
+ Bases.erase(std::lower_bound(Bases.begin(), Bases.end(), Address));
}
if (auto Res =
diff --git a/offload/unittests/OffloadAPI/memory/olMemAlloc.cpp b/offload/unittests/OffloadAPI/memory/olMemAlloc.cpp
index 00e428ec2abc7..445262aa0c583 100644
--- a/offload/unittests/OffloadAPI/memory/olMemAlloc.cpp
+++ b/offload/unittests/OffloadAPI/memory/olMemAlloc.cpp
@@ -34,6 +34,26 @@ TEST_P(olMemAllocTest, SuccessAllocDevice) {
olMemFree(Alloc);
}
+TEST_P(olMemAllocTest, SuccessAllocMany) {
+ std::vector<void *> Allocs;
+ Allocs.reserve(1000);
+
+ constexpr ol_alloc_type_t TYPES[3] = {
+ OL_ALLOC_TYPE_DEVICE, OL_ALLOC_TYPE_MANAGED, OL_ALLOC_TYPE_HOST};
+
+ for (size_t I = 1; I < 1000; I++) {
+ void *Alloc = nullptr;
+ ASSERT_SUCCESS(olMemAlloc(Device, TYPES[I % 3], 1024 * I, &Alloc));
+ ASSERT_NE(Alloc, nullptr);
+
+ Allocs.push_back(Alloc);
+ }
+
+ for (auto *A : Allocs) {
+ olMemFree(A);
+ }
+}
+
TEST_P(olMemAllocTest, InvalidNullDevice) {
void *Alloc = nullptr;
ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE,
|
@pbalcer Ping. |
If olMemAlloc happens to allocate memory that was already allocated elsewhere (possibly by another device on another platform), it is now thrown away and a new allocation generated. A new `AllocBases` vector is now available, which is an ordered list of allocation start addresses.
a94173e
to
3d0de74
Compare
// before the next one's start. | ||
// `Gap` is the first alloc who ends after the new alloc's start point. | ||
auto Gap = | ||
std::lower_bound(AllocBases.begin(), AllocBases.end(), *NewAlloc, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why do we need to check bounds again? I thought that for the purposes of olMemFree
it only mattered that we had something like this, where all that matters is they're unique.
Map<void *, Platform> Map;
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Unless I'm misremembering plans, in the future olGetMemInfo will accept a pointer anywhere into any allocation allocated by liboffload. This means that we need to ensure that no part of the buffers overlap, rather than just the start.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hm, good point. It definitely make this a bit more restrictive and expensive if virtual addresses cannot overlap at all, versus just a single pointer passed to free
. This at least means that we'll need this range-based table anyway, I suppose it's fine.
If olMemAlloc happens to allocate memory that was already allocated elsewhere (possibly by another device on another platform), it is now thrown away and a new allocation generated. A new `AllocBases` vector is now available, which is an ordered list of allocation start addresses.
If olMemAlloc happens to allocate memory that was already allocated
elsewhere (possibly by another device on another platform), it is now
thrown away and a new allocation generated.
A new
AllocBases
vector is now available, which is an ordered listof allocation start addresses.