-
Notifications
You must be signed in to change notification settings - Fork 14.7k
[Offload] Don't create events for empty queues #152304
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Add a device function to check if a device queue is empty. If liboffload tries to create an event for an empty queue, we create an "empty" event that is already complete. This allows `olCreateEvent`, `olSyncEvent` and `olWaitEvent` to run quickly for empty queues.
@llvm/pr-subscribers-backend-amdgpu Author: Ross Brunton (RossBrunton) ChangesAdd a device function to check if a device queue is empty. If liboffload This allows Full diff: https://github.com/llvm/llvm-project/pull/152304.diff 6 Files Affected:
diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index 6486b2b6d13a6..272a12ab59a06 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -72,6 +72,8 @@ struct ol_queue_impl_t {
struct ol_event_impl_t {
ol_event_impl_t(void *EventInfo, ol_queue_handle_t Queue)
: EventInfo(EventInfo), Queue(Queue) {}
+ // EventInfo may be null, in which case the event should be considered always
+ // complete
void *EventInfo;
ol_queue_handle_t Queue;
};
@@ -509,8 +511,8 @@ Error olWaitEvents_impl(ol_queue_handle_t Queue, ol_event_handle_t *Events,
return Plugin::error(ErrorCode::INVALID_NULL_HANDLE,
"olWaitEvents asked to wait on a NULL event");
- // Do nothing if the event is for this queue
- if (Event->Queue == Queue)
+ // Do nothing if the event is for this queue or the event is always complete
+ if (Event->Queue == Queue || !Event->EventInfo)
continue;
if (auto Err = Device->waitEvent(Event->EventInfo, Queue->AsyncInfo))
@@ -548,6 +550,10 @@ Error olGetQueueInfoSize_impl(ol_queue_handle_t Queue, ol_queue_info_t PropName,
}
Error olSyncEvent_impl(ol_event_handle_t Event) {
+ if (!Event->EventInfo)
+ // Event always complete
+ return Plugin::success();
+
if (auto Res = Event->Queue->Device->Device->syncEvent(Event->EventInfo))
return Res;
@@ -555,8 +561,9 @@ Error olSyncEvent_impl(ol_event_handle_t Event) {
}
Error olDestroyEvent_impl(ol_event_handle_t Event) {
- if (auto Res = Event->Queue->Device->Device->destroyEvent(Event->EventInfo))
- return Res;
+ if (Event->EventInfo)
+ if (auto Res = Event->Queue->Device->Device->destroyEvent(Event->EventInfo))
+ return Res;
return olDestroy(Event);
}
@@ -590,7 +597,16 @@ Error olGetEventInfoSize_impl(ol_event_handle_t Event, ol_event_info_t PropName,
}
Error olCreateEvent_impl(ol_queue_handle_t Queue, ol_event_handle_t *EventOut) {
+ auto Pending = Queue->Device->Device->hasPendingWork(Queue->AsyncInfo);
+ if (auto Err = Pending.takeError())
+ return Err;
+
*EventOut = new ol_event_impl_t(nullptr, Queue);
+ if (!*Pending)
+ // Queue is empty, don't record an event and consider the event always
+ // complete
+ return Plugin::success();
+
if (auto Res = Queue->Device->Device->createEvent(&(*EventOut)->EventInfo))
return Res;
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index b7bfa89fc9ea6..833c56df21ac3 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -2591,6 +2591,17 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
return Event->wait(*Stream);
}
+ Expected<bool> hasPendingWorkImpl(AsyncInfoWrapperTy &AsyncInfo) override {
+ auto Stream = AsyncInfo.getQueueAs<AMDGPUStreamTy *>();
+ if (!Stream)
+ return false;
+
+ auto Query = Stream->query();
+ if (Query)
+ return !*Query;
+ return Query.takeError();
+ }
+
/// Synchronize the current thread with the event.
Error syncEventImpl(void *EventPtr) override {
AMDGPUEventTy *Event = reinterpret_cast<AMDGPUEventTy *>(EventPtr);
diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h
index 8c17a2ee07047..99e7a13ff2b37 100644
--- a/offload/plugins-nextgen/common/include/PluginInterface.h
+++ b/offload/plugins-nextgen/common/include/PluginInterface.h
@@ -939,6 +939,14 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
Error printInfo();
virtual Expected<InfoTreeNode> obtainInfoImpl() = 0;
+ /// Return true if the device has work that is either queued or currently
+ /// running
+ ///
+ /// Devices which cannot report this information should always return true
+ Expected<bool> hasPendingWork(__tgt_async_info *AsyncInfo);
+ virtual Expected<bool>
+ hasPendingWorkImpl(AsyncInfoWrapperTy &AsyncInfoWrapper) = 0;
+
/// Getters of the grid values.
uint32_t getWarpSize() const { return GridValues.GV_Warp_Size; }
uint32_t getThreadLimit() const { return GridValues.GV_Max_WG_Size; }
diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp
index 94a050b559efe..cce2405cd51c4 100644
--- a/offload/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp
@@ -1623,6 +1623,21 @@ Error GenericDeviceTy::waitEvent(void *EventPtr, __tgt_async_info *AsyncInfo) {
return Err;
}
+Expected<bool> GenericDeviceTy::hasPendingWork(__tgt_async_info *AsyncInfo) {
+ AsyncInfoWrapperTy AsyncInfoWrapper(*this, AsyncInfo);
+ auto Res = hasPendingWorkImpl(AsyncInfoWrapper);
+ if (auto Err = Res.takeError()) {
+ AsyncInfoWrapper.finalize(Err);
+ return Err;
+ }
+
+ auto Err = Plugin::success();
+ AsyncInfoWrapper.finalize(Err);
+ if (Err)
+ return Err;
+ return Res;
+}
+
Error GenericDeviceTy::syncEvent(void *EventPtr) {
return syncEventImpl(EventPtr);
}
diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp
index c5f31670079ae..7649fd9285bb5 100644
--- a/offload/plugins-nextgen/cuda/src/rtl.cpp
+++ b/offload/plugins-nextgen/cuda/src/rtl.cpp
@@ -916,6 +916,11 @@ struct CUDADeviceTy : public GenericDeviceTy {
return Plugin::check(Res, "error in cuStreamWaitEvent: %s");
}
+ // TODO: This should be implementable on CUDA
+ Expected<bool> hasPendingWorkImpl(AsyncInfoWrapperTy &AsyncInfo) override {
+ return true;
+ }
+
/// Synchronize the current thread with the event.
Error syncEventImpl(void *EventPtr) override {
CUevent Event = reinterpret_cast<CUevent>(EventPtr);
diff --git a/offload/plugins-nextgen/host/src/rtl.cpp b/offload/plugins-nextgen/host/src/rtl.cpp
index d950572265b4c..9abc3507f6e68 100644
--- a/offload/plugins-nextgen/host/src/rtl.cpp
+++ b/offload/plugins-nextgen/host/src/rtl.cpp
@@ -333,6 +333,9 @@ struct GenELF64DeviceTy : public GenericDeviceTy {
AsyncInfoWrapperTy &AsyncInfoWrapper) override {
return Plugin::success();
}
+ Expected<bool> hasPendingWorkImpl(AsyncInfoWrapperTy &AsyncInfo) override {
+ return true;
+ }
Error syncEventImpl(void *EventPtr) override { return Plugin::success(); }
/// Print information about the device.
|
@llvm/pr-subscribers-offload Author: Ross Brunton (RossBrunton) ChangesAdd a device function to check if a device queue is empty. If liboffload This allows Full diff: https://github.com/llvm/llvm-project/pull/152304.diff 6 Files Affected:
diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index 6486b2b6d13a6..272a12ab59a06 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -72,6 +72,8 @@ struct ol_queue_impl_t {
struct ol_event_impl_t {
ol_event_impl_t(void *EventInfo, ol_queue_handle_t Queue)
: EventInfo(EventInfo), Queue(Queue) {}
+ // EventInfo may be null, in which case the event should be considered always
+ // complete
void *EventInfo;
ol_queue_handle_t Queue;
};
@@ -509,8 +511,8 @@ Error olWaitEvents_impl(ol_queue_handle_t Queue, ol_event_handle_t *Events,
return Plugin::error(ErrorCode::INVALID_NULL_HANDLE,
"olWaitEvents asked to wait on a NULL event");
- // Do nothing if the event is for this queue
- if (Event->Queue == Queue)
+ // Do nothing if the event is for this queue or the event is always complete
+ if (Event->Queue == Queue || !Event->EventInfo)
continue;
if (auto Err = Device->waitEvent(Event->EventInfo, Queue->AsyncInfo))
@@ -548,6 +550,10 @@ Error olGetQueueInfoSize_impl(ol_queue_handle_t Queue, ol_queue_info_t PropName,
}
Error olSyncEvent_impl(ol_event_handle_t Event) {
+ if (!Event->EventInfo)
+ // Event always complete
+ return Plugin::success();
+
if (auto Res = Event->Queue->Device->Device->syncEvent(Event->EventInfo))
return Res;
@@ -555,8 +561,9 @@ Error olSyncEvent_impl(ol_event_handle_t Event) {
}
Error olDestroyEvent_impl(ol_event_handle_t Event) {
- if (auto Res = Event->Queue->Device->Device->destroyEvent(Event->EventInfo))
- return Res;
+ if (Event->EventInfo)
+ if (auto Res = Event->Queue->Device->Device->destroyEvent(Event->EventInfo))
+ return Res;
return olDestroy(Event);
}
@@ -590,7 +597,16 @@ Error olGetEventInfoSize_impl(ol_event_handle_t Event, ol_event_info_t PropName,
}
Error olCreateEvent_impl(ol_queue_handle_t Queue, ol_event_handle_t *EventOut) {
+ auto Pending = Queue->Device->Device->hasPendingWork(Queue->AsyncInfo);
+ if (auto Err = Pending.takeError())
+ return Err;
+
*EventOut = new ol_event_impl_t(nullptr, Queue);
+ if (!*Pending)
+ // Queue is empty, don't record an event and consider the event always
+ // complete
+ return Plugin::success();
+
if (auto Res = Queue->Device->Device->createEvent(&(*EventOut)->EventInfo))
return Res;
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index b7bfa89fc9ea6..833c56df21ac3 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -2591,6 +2591,17 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
return Event->wait(*Stream);
}
+ Expected<bool> hasPendingWorkImpl(AsyncInfoWrapperTy &AsyncInfo) override {
+ auto Stream = AsyncInfo.getQueueAs<AMDGPUStreamTy *>();
+ if (!Stream)
+ return false;
+
+ auto Query = Stream->query();
+ if (Query)
+ return !*Query;
+ return Query.takeError();
+ }
+
/// Synchronize the current thread with the event.
Error syncEventImpl(void *EventPtr) override {
AMDGPUEventTy *Event = reinterpret_cast<AMDGPUEventTy *>(EventPtr);
diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h
index 8c17a2ee07047..99e7a13ff2b37 100644
--- a/offload/plugins-nextgen/common/include/PluginInterface.h
+++ b/offload/plugins-nextgen/common/include/PluginInterface.h
@@ -939,6 +939,14 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
Error printInfo();
virtual Expected<InfoTreeNode> obtainInfoImpl() = 0;
+ /// Return true if the device has work that is either queued or currently
+ /// running
+ ///
+ /// Devices which cannot report this information should always return true
+ Expected<bool> hasPendingWork(__tgt_async_info *AsyncInfo);
+ virtual Expected<bool>
+ hasPendingWorkImpl(AsyncInfoWrapperTy &AsyncInfoWrapper) = 0;
+
/// Getters of the grid values.
uint32_t getWarpSize() const { return GridValues.GV_Warp_Size; }
uint32_t getThreadLimit() const { return GridValues.GV_Max_WG_Size; }
diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp
index 94a050b559efe..cce2405cd51c4 100644
--- a/offload/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp
@@ -1623,6 +1623,21 @@ Error GenericDeviceTy::waitEvent(void *EventPtr, __tgt_async_info *AsyncInfo) {
return Err;
}
+Expected<bool> GenericDeviceTy::hasPendingWork(__tgt_async_info *AsyncInfo) {
+ AsyncInfoWrapperTy AsyncInfoWrapper(*this, AsyncInfo);
+ auto Res = hasPendingWorkImpl(AsyncInfoWrapper);
+ if (auto Err = Res.takeError()) {
+ AsyncInfoWrapper.finalize(Err);
+ return Err;
+ }
+
+ auto Err = Plugin::success();
+ AsyncInfoWrapper.finalize(Err);
+ if (Err)
+ return Err;
+ return Res;
+}
+
Error GenericDeviceTy::syncEvent(void *EventPtr) {
return syncEventImpl(EventPtr);
}
diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp
index c5f31670079ae..7649fd9285bb5 100644
--- a/offload/plugins-nextgen/cuda/src/rtl.cpp
+++ b/offload/plugins-nextgen/cuda/src/rtl.cpp
@@ -916,6 +916,11 @@ struct CUDADeviceTy : public GenericDeviceTy {
return Plugin::check(Res, "error in cuStreamWaitEvent: %s");
}
+ // TODO: This should be implementable on CUDA
+ Expected<bool> hasPendingWorkImpl(AsyncInfoWrapperTy &AsyncInfo) override {
+ return true;
+ }
+
/// Synchronize the current thread with the event.
Error syncEventImpl(void *EventPtr) override {
CUevent Event = reinterpret_cast<CUevent>(EventPtr);
diff --git a/offload/plugins-nextgen/host/src/rtl.cpp b/offload/plugins-nextgen/host/src/rtl.cpp
index d950572265b4c..9abc3507f6e68 100644
--- a/offload/plugins-nextgen/host/src/rtl.cpp
+++ b/offload/plugins-nextgen/host/src/rtl.cpp
@@ -333,6 +333,9 @@ struct GenELF64DeviceTy : public GenericDeviceTy {
AsyncInfoWrapperTy &AsyncInfoWrapper) override {
return Plugin::success();
}
+ Expected<bool> hasPendingWorkImpl(AsyncInfoWrapperTy &AsyncInfo) override {
+ return true;
+ }
Error syncEventImpl(void *EventPtr) override { return Plugin::success(); }
/// Print information about the device.
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, although since this changes PluginInterface it might be good to wait for another approval
@@ -590,7 +597,16 @@ Error olGetEventInfoSize_impl(ol_event_handle_t Event, ol_event_info_t PropName, | |||
} | |||
|
|||
Error olCreateEvent_impl(ol_queue_handle_t Queue, ol_event_handle_t *EventOut) { | |||
auto Pending = Queue->Device->Device->hasPendingWork(Queue->AsyncInfo); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Unrelated, would be nice to have something nicer than Device->Device->
.
if (!Stream) | ||
return false; | ||
|
||
auto Query = Stream->query(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This has the side effect of performing cleanup required for the device (freeing any temporary memory, releasing signals, clearing the array, etc.) if the queue happens to be empty. I think this is fine and a sensible thing to do. It should have no observable side effects outside of the RTL.
Add a device function to check if a device queue is empty. If liboffload
tries to create an event for an empty queue, we create an "empty" event
that is already complete.
This allows
olCreateEvent
,olSyncEvent
andolWaitEvent
to runquickly for empty queues.