diff --git a/source/adapters/level_zero/command_buffer.cpp b/source/adapters/level_zero/command_buffer.cpp index 3b4a91fc0a..02d0fe0352 100644 --- a/source/adapters/level_zero/command_buffer.cpp +++ b/source/adapters/level_zero/command_buffer.cpp @@ -8,6 +8,7 @@ // //===----------------------------------------------------------------------===// #include "command_buffer.hpp" +#include "event.hpp" #include "logger/ur_logger.hpp" #include "ur_level_zero.hpp" @@ -268,7 +269,7 @@ ur_result_t createSyncPointAndGetZeEvents( UR_CALL(getEventsFromSyncPoints(CommandBuffer, NumSyncPointsInWaitList, SyncPointWaitList, ZeEventList)); - ur_event_handle_t LaunchEvent; + ur_event_handle_legacy_t LaunchEvent; UR_CALL(EventCreate(CommandBuffer->Context, nullptr, false, HostVisible, &LaunchEvent, false, !CommandBuffer->IsProfilingEnabled)); LaunchEvent->CommandType = CommandType; @@ -414,9 +415,9 @@ ur_exp_command_buffer_handle_t_::ur_exp_command_buffer_handle_t_( : Context(Context), Device(Device), ZeComputeCommandList(CommandList), ZeComputeCommandListTranslated(CommandListTranslated), ZeCommandListResetEvents(CommandListResetEvents), - ZeCopyCommandList(CopyCommandList), SignalEvent(SignalEvent), - WaitEvent(WaitEvent), AllResetEvent(AllResetEvent), ZeFencesMap(), - ZeActiveFence(nullptr), SyncPoints(), NextSyncPoint(0), + ZeCopyCommandList(CopyCommandList), SignalEvent(Legacy(SignalEvent)), + WaitEvent(Legacy(WaitEvent)), AllResetEvent(Legacy(AllResetEvent)), + ZeFencesMap(), ZeActiveFence(nullptr), SyncPoints(), NextSyncPoint(0), IsUpdatable(Desc ? Desc->isUpdatable : false), IsProfilingEnabled(Desc ? Desc->enableProfiling : false), IsInOrderCmdList(IsInOrderCmdList) { @@ -523,7 +524,8 @@ ur_exp_command_buffer_command_handle_t_:: } void ur_exp_command_buffer_handle_t_::registerSyncPoint( - ur_exp_command_buffer_sync_point_t SyncPoint, ur_event_handle_t Event) { + ur_exp_command_buffer_sync_point_t SyncPoint, + ur_event_handle_legacy_t Event) { SyncPoints[SyncPoint] = Event; NextSyncPoint++; ZeEventsList.push_back(Event->ZeEvent); @@ -633,9 +635,9 @@ urCommandBufferCreateExp(ur_context_handle_t Context, ur_device_handle_t Device, UR_RESULT_ERROR_UNSUPPORTED_FEATURE); } - ur_event_handle_t SignalEvent; - ur_event_handle_t WaitEvent; - ur_event_handle_t AllResetEvent; + ur_event_handle_legacy_t SignalEvent; + ur_event_handle_legacy_t WaitEvent; + ur_event_handle_legacy_t AllResetEvent; UR_CALL(EventCreate(Context, nullptr, false, false, &SignalEvent, false, !EnableProfiling)); @@ -1248,7 +1250,7 @@ ur_result_t getZeCommandQueue(ur_queue_handle_legacy_t Queue, ur_result_t waitForDependencies(ur_exp_command_buffer_handle_t CommandBuffer, ur_queue_handle_legacy_t Queue, uint32_t NumEventsInWaitList, - const ur_event_handle_t *EventWaitList) { + const ur_event_handle_legacy_t *EventWaitList) { const bool UseCopyEngine = false; bool MustSignalWaitEvent = true; if (NumEventsInWaitList) { @@ -1299,9 +1301,9 @@ ur_result_t waitForDependencies(ur_exp_command_buffer_handle_t CommandBuffer, ur_result_t createUserEvent(ur_exp_command_buffer_handle_t CommandBuffer, ur_queue_handle_legacy_t Queue, ur_command_list_ptr_t SignalCommandList, - ur_event_handle_t &Event) { + ur_event_handle_legacy_t &Event) { // Execution event for this enqueue of the UR command-buffer - ur_event_handle_t RetEvent{}; + ur_event_handle_legacy_t RetEvent{}; UR_CALL(createEventAndAssociateQueue(Queue, &RetEvent, UR_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP, @@ -1355,7 +1357,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( CommandBuffer->getFenceForQueue(ZeCommandQueue, ZeFence); UR_CALL(waitForDependencies(CommandBuffer, Queue, NumEventsInWaitList, - EventWaitList)); + Legacy(EventWaitList))); // Submit reset events command-list. This command-list is of a batch // command-list type, regardless of the UR Queue type. We therefore need to @@ -1385,9 +1387,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( // Create a command-list to signal the Event on completion ur_command_list_ptr_t SignalCommandList{}; - UR_CALL(Queue->Context->getAvailableCommandList(Queue, SignalCommandList, - false, NumEventsInWaitList, - EventWaitList, false)); + UR_CALL(Queue->Context->getAvailableCommandList( + Queue, SignalCommandList, false, NumEventsInWaitList, + Legacy(EventWaitList), false)); // Reset the wait-event for the UR command-buffer that is signaled when its // submission dependencies have been satisfied. @@ -1399,7 +1401,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( (SignalCommandList->first, CommandBuffer->AllResetEvent->ZeEvent)); if (Event) { - UR_CALL(createUserEvent(CommandBuffer, Queue, SignalCommandList, *Event)); + UR_CALL(createUserEvent(CommandBuffer, Queue, SignalCommandList, + *Legacy(Event))); } UR_CALL(Queue->executeCommandList(SignalCommandList, false, false)); diff --git a/source/adapters/level_zero/command_buffer.hpp b/source/adapters/level_zero/command_buffer.hpp index a514501a5e..ec64da8074 100644 --- a/source/adapters/level_zero/command_buffer.hpp +++ b/source/adapters/level_zero/command_buffer.hpp @@ -35,7 +35,7 @@ struct ur_exp_command_buffer_handle_t_ : public _ur_object { const ur_exp_command_buffer_desc_t *Desc, const bool IsInOrderCmdList); void registerSyncPoint(ur_exp_command_buffer_sync_point_t SyncPoint, - ur_event_handle_t Event); + ur_event_handle_legacy_t Event); ur_exp_command_buffer_sync_point_t getNextSyncPoint() const { return NextSyncPoint; @@ -82,13 +82,13 @@ struct ur_exp_command_buffer_handle_t_ : public _ur_object { ze_command_list_handle_t ZeCopyCommandList; // Event which will signals the most recent execution of the command-buffer // has finished - ur_event_handle_t SignalEvent = nullptr; + ur_event_handle_legacy_t SignalEvent = nullptr; // Event which a command-buffer waits on until the wait-list dependencies // passed to a command-buffer enqueue have been satisfied. - ur_event_handle_t WaitEvent = nullptr; + ur_event_handle_legacy_t WaitEvent = nullptr; // Event which a command-buffer waits on until the main command-list event // have been reset. - ur_event_handle_t AllResetEvent = nullptr; + ur_event_handle_legacy_t AllResetEvent = nullptr; // This flag is must be set to false if at least one copy command has been // added to `ZeCopyCommandList` bool MCopyCommandListEmpty = true; @@ -100,7 +100,8 @@ struct ur_exp_command_buffer_handle_t_ : public _ur_object { // itself. ze_fence_handle_t ZeActiveFence; // Map of sync_points to ur_events - std::unordered_map + std::unordered_map SyncPoints; // Next sync_point value (may need to consider ways to reuse values if 32-bits // is not enough) diff --git a/source/adapters/level_zero/common.hpp b/source/adapters/level_zero/common.hpp index a81b852727..b2c8b6976f 100644 --- a/source/adapters/level_zero/common.hpp +++ b/source/adapters/level_zero/common.hpp @@ -512,4 +512,21 @@ extern thread_local int32_t ErrorAdapterNativeCode; ur_result_t ErrorCode, int32_t AdapterErrorCode); +// Get specific implementation of UR type from the handle +template UrType GetImpl(UrHandle Handle) { + if (!Handle) + return nullptr; + auto *H = dynamic_cast(Handle); + if (!H) { + if constexpr (std::is_same_v) { + throw UR_RESULT_ERROR_INVALID_QUEUE; + } else if constexpr (std::is_same_v) { + throw UR_RESULT_ERROR_INVALID_EVENT; + } else { + throw UR_RESULT_ERROR_UNKNOWN; + } + } + return H; +} + #define L0_DRIVER_INORDER_MIN_VERSION 29534 diff --git a/source/adapters/level_zero/context.cpp b/source/adapters/level_zero/context.cpp index 22adfa96ed..fdf3f0f05e 100644 --- a/source/adapters/level_zero/context.cpp +++ b/source/adapters/level_zero/context.cpp @@ -555,7 +555,7 @@ ur_result_t ur_context_handle_t_::getFreeSlotInExistingOrNewPool( return UR_RESULT_SUCCESS; } -ur_event_handle_t ur_context_handle_t_::getEventFromContextCache( +ur_event_handle_legacy_t ur_context_handle_t_::getEventFromContextCache( bool HostVisible, bool WithProfiling, ur_device_handle_t Device, bool CounterBasedEventEnabled) { std::scoped_lock Lock(EventCacheMutex); @@ -564,7 +564,7 @@ ur_event_handle_t ur_context_handle_t_::getEventFromContextCache( return nullptr; auto It = Cache->begin(); - ur_event_handle_t Event = *It; + ur_event_handle_legacy_t Event = *It; if (Event->CounterBasedEventsEnabled != CounterBasedEventEnabled) { return nullptr; } @@ -574,7 +574,8 @@ ur_event_handle_t ur_context_handle_t_::getEventFromContextCache( return Event; } -void ur_context_handle_t_::addEventToContextCache(ur_event_handle_t Event) { +void ur_context_handle_t_::addEventToContextCache( + ur_event_handle_legacy_t Event) { std::scoped_lock Lock(EventCacheMutex); ur_device_handle_t Device = nullptr; @@ -587,8 +588,8 @@ void ur_context_handle_t_::addEventToContextCache(ur_event_handle_t Event) { Cache->emplace_back(Event); } -ur_result_t -ur_context_handle_t_::decrementUnreleasedEventsInPool(ur_event_handle_t Event) { +ur_result_t ur_context_handle_t_::decrementUnreleasedEventsInPool( + ur_event_handle_legacy_t Event) { std::shared_lock EventLock(Event->Mutex, std::defer_lock); std::scoped_lock> LockAll( ZeEventPoolCacheMutex, EventLock); @@ -648,14 +649,14 @@ static const size_t CmdListsCleanupThreshold = [] { ur_result_t ur_context_handle_t_::getAvailableCommandList( ur_queue_handle_legacy_t Queue, ur_command_list_ptr_t &CommandList, bool UseCopyEngine, uint32_t NumEventsInWaitList, - const ur_event_handle_t *EventWaitList, bool AllowBatching, + const ur_event_handle_legacy_t *EventWaitList, bool AllowBatching, ze_command_queue_handle_t *ForcedCmdQueue) { // Immediate commandlists have been pre-allocated and are always available. if (Queue->UsingImmCmdLists) { CommandList = Queue->getQueueGroup(UseCopyEngine).getImmCmdList(); if (CommandList->second.EventList.size() >= Queue->getImmdCmmdListsEventCleanupThreshold()) { - std::vector EventListToCleanup; + std::vector EventListToCleanup; Queue->resetCommandList(CommandList, false, EventListToCleanup); CleanupEventListFromResetCmdList(EventListToCleanup, true); } @@ -804,7 +805,7 @@ ur_result_t ur_context_handle_t_::getAvailableCommandList( ze_result_t ZeResult = ZE_CALL_NOCHECK(zeFenceQueryStatus, (it->second.ZeFence)); if (ZeResult == ZE_RESULT_SUCCESS) { - std::vector EventListToCleanup; + std::vector EventListToCleanup; Queue->resetCommandList(it, false, EventListToCleanup); CleanupEventListFromResetCmdList(EventListToCleanup, true /* QueueLocked */); diff --git a/source/adapters/level_zero/context.hpp b/source/adapters/level_zero/context.hpp index dc70a2470c..10c5db7fa1 100644 --- a/source/adapters/level_zero/context.hpp +++ b/source/adapters/level_zero/context.hpp @@ -170,7 +170,7 @@ struct ur_context_handle_t_ : _ur_object { ur_mutex EventCacheMutex; // Caches for events. - using EventCache = std::vector>; + using EventCache = std::vector>; EventCache EventCaches{4}; std::vector> EventCachesDeviceMap{4}; @@ -204,14 +204,14 @@ struct ur_context_handle_t_ : _ur_object { bool CounterBasedEventEnabled, bool UsingImmCmdList); - // Get ur_event_handle_t from cache. - ur_event_handle_t getEventFromContextCache(bool HostVisible, - bool WithProfiling, - ur_device_handle_t Device, - bool CounterBasedEventEnabled); + // Get ur_event_handle_legacy_t from cache. + ur_event_handle_legacy_t + getEventFromContextCache(bool HostVisible, bool WithProfiling, + ur_device_handle_t Device, + bool CounterBasedEventEnabled); - // Add ur_event_handle_t to cache. - void addEventToContextCache(ur_event_handle_t); + // Add ur_event_handle_legacy_t to cache. + void addEventToContextCache(ur_event_handle_legacy_t); enum EventPoolCacheType { HostVisibleCacheType, @@ -271,7 +271,7 @@ struct ur_context_handle_t_ : _ur_object { // Decrement number of events living in the pool upon event destroy // and return the pool to the cache if there are no unreleased events. - ur_result_t decrementUnreleasedEventsInPool(ur_event_handle_t Event); + ur_result_t decrementUnreleasedEventsInPool(ur_event_handle_legacy_t Event); // Retrieves a command list for executing on this device along with // a fence to be used in tracking the execution of this command list. @@ -296,7 +296,7 @@ struct ur_context_handle_t_ : _ur_object { ur_result_t getAvailableCommandList( ur_queue_handle_legacy_t Queue, ur_command_list_ptr_t &CommandList, bool UseCopyEngine, uint32_t NumEventsInWaitList, - const ur_event_handle_t *EventWaitList, bool AllowBatching = false, + const ur_event_handle_legacy_t *EventWaitList, bool AllowBatching = false, ze_command_queue_handle_t *ForcedCmdQueue = nullptr); // Checks if Device is covered by this context. diff --git a/source/adapters/level_zero/event.cpp b/source/adapters/level_zero/event.cpp index 33495f52b8..068ec855f0 100644 --- a/source/adapters/level_zero/event.cpp +++ b/source/adapters/level_zero/event.cpp @@ -47,7 +47,7 @@ static const bool UseMultipleCmdlistBarriers = [] { bool WaitListEmptyOrAllEventsFromSameQueue( ur_queue_handle_legacy_t Queue, uint32_t NumEventsInWaitList, - const ur_event_handle_t *EventWaitList) { + const ur_event_handle_legacy_t *EventWaitList) { if (!NumEventsInWaitList) return true; @@ -81,17 +81,19 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueEventsWait( ///< [in] handle of _ur_ze_event_list_t TmpWaitList = {}; UR_CALL(TmpWaitList.createAndRetainUrZeEventList( - NumEventsInWaitList, EventWaitList, Queue, UseCopyEngine)); + NumEventsInWaitList, Legacy(EventWaitList), Queue, UseCopyEngine)); // Get a new command list to be used on this call ur_command_list_ptr_t CommandList{}; UR_CALL(Queue->Context->getAvailableCommandList( - Queue, CommandList, UseCopyEngine, NumEventsInWaitList, EventWaitList)); + Queue, CommandList, UseCopyEngine, NumEventsInWaitList, + Legacy(EventWaitList))); ze_event_handle_t ZeEvent = nullptr; - ur_event_handle_t InternalEvent; + ur_event_handle_legacy_t InternalEvent; bool IsInternal = OutEvent == nullptr; - ur_event_handle_t *Event = OutEvent ? OutEvent : &InternalEvent; + ur_event_handle_legacy_t *Event = + OutEvent ? Legacy(OutEvent) : &InternalEvent; UR_CALL(createEventAndAssociateQueue(Queue, Event, UR_COMMAND_EVENTS_WAIT, CommandList, IsInternal, false)); @@ -120,7 +122,7 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueEventsWait( ///< [in] handle of std::scoped_lock lock(Queue->Mutex); if (OutEvent) { - UR_CALL(createEventAndAssociateQueue(Queue, OutEvent, + UR_CALL(createEventAndAssociateQueue(Queue, Legacy(OutEvent), UR_COMMAND_EVENTS_WAIT, Queue->CommandListMap.end(), false, /* IsInternal */ false)); @@ -129,11 +131,12 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueEventsWait( ///< [in] handle of UR_CALL(Queue->synchronize()); if (OutEvent) { - Queue->LastCommandEvent = reinterpret_cast(*OutEvent); + Queue->LastCommandEvent = + reinterpret_cast(*OutEvent); - if (!(*OutEvent)->CounterBasedEventsEnabled) - ZE2UR_CALL(zeEventHostSignal, ((*OutEvent)->ZeEvent)); - (*OutEvent)->Completed = true; + if (!(*Legacy(OutEvent))->CounterBasedEventsEnabled) + ZE2UR_CALL(zeEventHostSignal, ((*Legacy(OutEvent))->ZeEvent)); + (*Legacy(OutEvent))->Completed = true; } } @@ -175,7 +178,7 @@ ur_queue_handle_legacy_t_::enqueueEventsWaitWithBarrier( ///< [in] handle of the auto insertBarrierIntoCmdList = [&Queue](ur_command_list_ptr_t CmdList, const _ur_ze_event_list_t &EventWaitList, - ur_event_handle_t &Event, bool IsInternal) { + ur_event_handle_legacy_t &Event, bool IsInternal) { UR_CALL(createEventAndAssociateQueue( Queue, &Event, UR_COMMAND_EVENTS_WAIT_WITH_BARRIER, CmdList, IsInternal, false)); @@ -222,15 +225,16 @@ ur_queue_handle_legacy_t_::enqueueEventsWaitWithBarrier( ///< [in] handle of the return UR_RESULT_SUCCESS; } - ur_event_handle_t InternalEvent; + ur_event_handle_legacy_t InternalEvent; bool IsInternal = OutEvent == nullptr; - ur_event_handle_t *Event = OutEvent ? OutEvent : &InternalEvent; + ur_event_handle_legacy_t *Event = + OutEvent ? Legacy(OutEvent) : &InternalEvent; // For in-order queue and wait-list which is empty or has events from // the same queue just use the last command event as the barrier event. if (Queue->isInOrderQueue() && WaitListEmptyOrAllEventsFromSameQueue(Queue, NumEventsInWaitList, - EventWaitList) && + Legacy(EventWaitList)) && Queue->LastCommandEvent && !Queue->LastCommandEvent->IsDiscarded) { UR_CALL(urEventRetain(Queue->LastCommandEvent)); *Event = Queue->LastCommandEvent; @@ -254,13 +258,14 @@ ur_queue_handle_legacy_t_::enqueueEventsWaitWithBarrier( ///< [in] handle of the // Retain the events as they will be owned by the result event. _ur_ze_event_list_t TmpWaitList; UR_CALL(TmpWaitList.createAndRetainUrZeEventList( - NumEventsInWaitList, EventWaitList, Queue, false /*UseCopyEngine=*/)); + NumEventsInWaitList, Legacy(EventWaitList), Queue, + false /*UseCopyEngine=*/)); // Get an arbitrary command-list in the queue. ur_command_list_ptr_t CmdList; UR_CALL(Queue->Context->getAvailableCommandList( Queue, CmdList, false /*UseCopyEngine=*/, NumEventsInWaitList, - EventWaitList, OkToBatch)); + Legacy(EventWaitList), OkToBatch)); // Insert the barrier into the command-list and execute. UR_CALL(insertBarrierIntoCmdList(CmdList, TmpWaitList, *Event, IsInternal)); @@ -270,7 +275,7 @@ ur_queue_handle_legacy_t_::enqueueEventsWaitWithBarrier( ///< [in] handle of the // Because of the dependency between commands in the in-order queue we don't // need to keep track of any active barriers if we have in-order queue. if (UseMultipleCmdlistBarriers && !Queue->isInOrderQueue()) { - auto UREvent = reinterpret_cast(*Event); + auto UREvent = reinterpret_cast(*Event); Queue->ActiveBarriers.add(UREvent); } return UR_RESULT_SUCCESS; @@ -316,7 +321,7 @@ ur_queue_handle_legacy_t_::enqueueEventsWaitWithBarrier( ///< [in] handle of the ur_command_list_ptr_t CmdList; UR_CALL(Queue->Context->getAvailableCommandList( Queue, CmdList, UseCopyEngine, NumEventsInWaitList, - EventWaitList, OkToBatch, &ZeQueue)); + Legacy(EventWaitList), OkToBatch, &ZeQueue)); CmdLists.push_back(CmdList); } } @@ -330,14 +335,14 @@ ur_queue_handle_legacy_t_::enqueueEventsWaitWithBarrier( ///< [in] handle of the ur_command_list_ptr_t CmdList; UR_CALL(Queue->Context->getAvailableCommandList( Queue, CmdList, false /*UseCopyEngine=*/, NumEventsInWaitList, - EventWaitList, OkToBatch)); + Legacy(EventWaitList), OkToBatch)); CmdLists.push_back(CmdList); } if (CmdLists.size() > 1) { // Insert a barrier into each unique command queue using the available // command-lists. - std::vector EventWaitVector(CmdLists.size()); + std::vector EventWaitVector(CmdLists.size()); for (size_t I = 0; I < CmdLists.size(); ++I) { UR_CALL(insertBarrierIntoCmdList(CmdLists[I], _ur_ze_event_list_t{}, EventWaitVector[I], @@ -354,7 +359,8 @@ ur_queue_handle_legacy_t_::enqueueEventsWaitWithBarrier( ///< [in] handle of the _ur_ze_event_list_t BaseWaitList; UR_CALL(BaseWaitList.createAndRetainUrZeEventList( EventWaitVector.size(), - reinterpret_cast(EventWaitVector.data()), + reinterpret_cast( + EventWaitVector.data()), Queue, ConvergenceCmdList->second.isCopy(Queue))); // Insert a barrier with the events from each command-queue into the @@ -383,14 +389,14 @@ ur_queue_handle_legacy_t_::enqueueEventsWaitWithBarrier( ///< [in] handle of the } UR_CALL(Queue->ActiveBarriers.clear()); - auto UREvent = reinterpret_cast(*Event); + auto UREvent = reinterpret_cast(*Event); Queue->ActiveBarriers.add(UREvent); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urEventGetInfo( - ur_event_handle_t Event, ///< [in] handle of the event object - ur_event_info_t PropName, ///< [in] the name of the event property to query + ur_event_handle_t UrEvent, ///< [in] handle of the event object + ur_event_info_t PropName, ///< [in] the name of the event property to query size_t PropValueSize, ///< [in] size in bytes of the event property value void *PropValue, ///< [out][optional] value of the event property size_t @@ -398,6 +404,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetInfo( ) { UrReturnHelper ReturnValue(PropValueSize, PropValue, PropValueSizeRet); + auto Event = Legacy(UrEvent); + switch (PropName) { case UR_EVENT_INFO_COMMAND_QUEUE: { std::shared_lock EventLock(Event->Mutex); @@ -472,7 +480,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetInfo( } UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo( - ur_event_handle_t Event, ///< [in] handle of the event object + ur_event_handle_t UrEvent, ///< [in] handle of the event object ur_profiling_info_t PropName, ///< [in] the name of the profiling property to query size_t @@ -481,6 +489,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo( size_t *PropValueSizeRet ///< [out][optional] pointer to the actual size in ///< bytes returned in propValue ) { + auto Event = Legacy(UrEvent); + std::shared_lock EventLock(Event->Mutex); // The event must either have profiling enabled or be recording timestamps. @@ -665,17 +675,19 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueTimestampRecordingExp( bool Blocking, ///< [in] blocking or non-blocking enqueue uint32_t NumEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t - *EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] - ///< pointer to a list of events that must be complete - ///< before this command can be executed. If nullptr, - ///< the numEventsInWaitList must be 0, indicating - ///< that this command does not wait on any event to - ///< complete. + *UrEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] + ///< pointer to a list of events that must be complete + ///< before this command can be executed. If nullptr, + ///< the numEventsInWaitList must be 0, indicating + ///< that this command does not wait on any event to + ///< complete. ur_event_handle_t - *OutEvent ///< [in,out] return an event object that identifies - ///< this particular command instance. + *UrOutEvent ///< [in,out] return an event object that identifies + ///< this particular command instance. ) { auto Queue = this; + auto EventWaitList = Legacy(UrEventWaitList); + auto OutEvent = Legacy(UrOutEvent); // Lock automatically releases when this goes out of scope. std::scoped_lock lock(Queue->Mutex); @@ -718,7 +730,7 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueTimestampRecordingExp( return UR_RESULT_SUCCESS; } -ur_result_t ur_event_handle_t_::getOrCreateHostVisibleEvent( +ur_result_t ur_event_handle_legacy_t_::getOrCreateHostVisibleEvent( ze_event_handle_t &ZeHostVisibleEvent) { auto UrQueue = Legacy(this->UrQueue); @@ -770,9 +782,10 @@ ur_result_t ur_event_handle_t_::getOrCreateHostVisibleEvent( UR_APIEXPORT ur_result_t UR_APICALL urEventWait( uint32_t NumEvents, ///< [in] number of events in the event list const ur_event_handle_t - *EventWaitList ///< [in][range(0, numEvents)] pointer to a list of - ///< events to wait for completion + *UrEventWaitList ///< [in][range(0, numEvents)] pointer to a list of + ///< events to wait for completion ) { + auto EventWaitList = Legacy(UrEventWaitList); for (uint32_t I = 0; I < NumEvents; I++) { auto e = EventWaitList[I]; auto UrQueue = Legacy(e->UrQueue); @@ -781,7 +794,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventWait( // This ensures that all signalling commands are submitted below and // thus proxy events can be waited without a deadlock. // - ur_event_handle_t_ *Event = ur_cast(e); + ur_event_handle_legacy_t_ *Event = + ur_cast(e); if (!Event->hasExternalRefs()) die("urEventsWait must not be called for an internal event"); @@ -792,7 +806,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventWait( } // Submit dependent open command lists for execution, if any for (uint32_t I = 0; I < NumEvents; I++) { - ur_event_handle_t_ *Event = ur_cast(EventWaitList[I]); + ur_event_handle_legacy_t_ *Event = + ur_cast(EventWaitList[I]); auto UrQueue = Legacy(Event->UrQueue); if (UrQueue) { // Lock automatically releases when this goes out of scope. @@ -804,8 +819,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventWait( std::unordered_set Queues; for (uint32_t I = 0; I < NumEvents; I++) { { - ur_event_handle_t_ *Event = - ur_cast(EventWaitList[I]); + ur_event_handle_legacy_t_ *Event = + ur_cast(EventWaitList[I]); { std::shared_lock EventLock(Event->Mutex); if (!Event->hasExternalRefs()) @@ -835,11 +850,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventWait( CleanupEventsInImmCmdLists( Legacy(Event->UrQueue), false /* QueueLocked */, false /* QueueSynced */, - reinterpret_cast(Event)); + reinterpret_cast(Event)); else { // NOTE: we are cleaning up after the event here to free resources // sooner in case run-time is not calling urEventRelease soon enough. - CleanupCompletedEvent(reinterpret_cast(Event)); + CleanupCompletedEvent( + reinterpret_cast(Event)); // For the case when we have out-of-order queue or regular command // lists its more efficient to check fences so put the queue in the // set to cleanup later. @@ -859,29 +875,37 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventWait( return UR_RESULT_SUCCESS; } +ur_result_t ur_event_handle_legacy_t_::retain() { + RefCountExternal++; + RefCount.increment(); + return UR_RESULT_SUCCESS; +} + +ur_result_t ur_event_handle_legacy_t_::release() { + RefCountExternal--; + UR_CALL(urEventReleaseInternal(this)); + + return UR_RESULT_SUCCESS; +} + UR_APIEXPORT ur_result_t UR_APICALL urEventRetain( ur_event_handle_t Event ///< [in] handle of the event object ) { - Event->RefCountExternal++; - Event->RefCount.increment(); - - return UR_RESULT_SUCCESS; + return Event->retain(); } UR_APIEXPORT ur_result_t UR_APICALL urEventRelease( ur_event_handle_t Event ///< [in] handle of the event object ) { - Event->RefCountExternal--; - UR_CALL(urEventReleaseInternal(Event)); - - return UR_RESULT_SUCCESS; + return Event->release(); } UR_APIEXPORT ur_result_t UR_APICALL urEventGetNativeHandle( - ur_event_handle_t Event, ///< [in] handle of the event. + ur_event_handle_t UrEvent, ///< [in] handle of the event. ur_native_handle_t *NativeEvent ///< [out] a pointer to the native handle of the event. ) { + auto Event = Legacy(UrEvent); { std::shared_lock Lock(Event->Mutex); auto *ZeEvent = ur_cast(NativeEvent); @@ -905,8 +929,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetNativeHandle( UR_APIEXPORT ur_result_t UR_APICALL urExtEventCreate( ur_context_handle_t Context, ///< [in] handle of the context object ur_event_handle_t - *Event ///< [out] pointer to the handle of the event object created. + *UrEvent ///< [out] pointer to the handle of the event object created. ) { + auto Event = Legacy(UrEvent); UR_CALL(EventCreate(Context, nullptr, false, true, Event)); (*Event)->RefCountExternal++; @@ -920,9 +945,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventCreateWithNativeHandle( ur_context_handle_t Context, ///< [in] handle of the context object const ur_event_native_properties_t *Properties, ur_event_handle_t - *Event ///< [out] pointer to the handle of the event object created. + *UrEvent ///< [out] pointer to the handle of the event object created. ) { - + auto Event = Legacy(UrEvent); // we dont have urEventCreate, so use this check for now to know that // the call comes from urEventCreate() if (reinterpret_cast(NativeEvent) == nullptr) { @@ -935,11 +960,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventCreateWithNativeHandle( } auto ZeEvent = ur_cast(NativeEvent); - ur_event_handle_t_ *UREvent{}; + ur_event_handle_legacy_t_ *UREvent{}; try { - UREvent = new ur_event_handle_t_(ZeEvent, nullptr /* ZeEventPool */, - Context, UR_EXT_COMMAND_TYPE_USER, - Properties->isNativeHandleOwned); + UREvent = new ur_event_handle_legacy_t_(ZeEvent, nullptr /* ZeEventPool */, + Context, UR_EXT_COMMAND_TYPE_USER, + Properties->isNativeHandleOwned); UREvent->RefCountExternal++; @@ -951,7 +976,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventCreateWithNativeHandle( // Assume native event is host-visible, or otherwise we'd // need to create a host-visible proxy for it. - UREvent->HostVisibleEvent = reinterpret_cast(UREvent); + UREvent->HostVisibleEvent = + reinterpret_cast(UREvent); // Unlike regular events managed by SYCL RT we don't have to wait for interop // events completion, and not need to do the their `cleanup()`. This in @@ -960,7 +986,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventCreateWithNativeHandle( // made for waiting for event completion, but not this interop event. UREvent->CleanedUp = true; - *Event = reinterpret_cast(UREvent); + *Event = reinterpret_cast(UREvent); return UR_RESULT_SUCCESS; } @@ -981,7 +1007,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventSetCallback( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t urEventReleaseInternal(ur_event_handle_t Event) { +ur_result_t urEventReleaseInternal(ur_event_handle_legacy_t Event) { if (!Event->RefCount.decrementAndTest()) return UR_RESULT_SUCCESS; @@ -1050,8 +1076,8 @@ ur_result_t urEventReleaseInternal(ur_event_handle_t Event) { // We intentionally incremented the reference counter when an event is // created so that we can avoid ur_queue_handle_t is released before the - // associated ur_event_handle_t is released. Here we have to decrement it so - // ur_queue_handle_t can be released successfully. + // associated ur_event_handle_legacy_t is released. Here we have to decrement + // it so ur_queue_handle_t can be released successfully. if (Queue) { UR_CALL(urQueueReleaseInternal(Queue)); } @@ -1092,11 +1118,11 @@ template <> ze_result_t zeHostSynchronize(ze_command_queue_handle_t Handle) { // the event, updates the last command event in the queue and cleans up all dep // events of the event. // If the caller locks queue mutex then it must pass 'true' to QueueLocked. -ur_result_t CleanupCompletedEvent(ur_event_handle_t Event, bool QueueLocked, - bool SetEventCompleted) { +ur_result_t CleanupCompletedEvent(ur_event_handle_legacy_t Event, + bool QueueLocked, bool SetEventCompleted) { ur_kernel_handle_t AssociatedKernel = nullptr; // List of dependent events. - std::list EventsToBeReleased; + std::list EventsToBeReleased; ur_queue_handle_legacy_t AssociatedQueue = nullptr; { // If the Event is already locked, then continue with the cleanup, otherwise @@ -1190,7 +1216,7 @@ ur_result_t CleanupCompletedEvent(ur_event_handle_t Event, bool QueueLocked, // recursion. That turned out to be a significant problem with the recursive // code that preceded this implementation. while (!EventsToBeReleased.empty()) { - ur_event_handle_t DepEvent = EventsToBeReleased.front(); + ur_event_handle_legacy_t DepEvent = EventsToBeReleased.front(); DepEvent->Completed = true; EventsToBeReleased.pop_front(); @@ -1230,7 +1256,7 @@ ur_result_t CleanupCompletedEvent(ur_event_handle_t Event, bool QueueLocked, // ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_legacy_t Queue, bool IsMultiDevice, - bool HostVisible, ur_event_handle_t *RetEvent, + bool HostVisible, ur_event_handle_legacy_t *RetEvent, bool CounterBasedEventEnabled, bool ForceDisableProfiling) { bool ProfilingEnabled = @@ -1280,7 +1306,7 @@ ur_result_t EventCreate(ur_context_handle_t Context, ZE2UR_CALL(zeEventCreate, (ZeEventPool, &ZeEventDesc, &ZeEvent)); try { - *RetEvent = new ur_event_handle_t_( + *RetEvent = new ur_event_handle_legacy_t_( ZeEvent, ZeEventPool, reinterpret_cast(Context), UR_EXT_COMMAND_TYPE_USER, true); } catch (const std::bad_alloc &) { @@ -1291,12 +1317,12 @@ ur_result_t EventCreate(ur_context_handle_t Context, (*RetEvent)->CounterBasedEventsEnabled = CounterBasedEventEnabled; if (HostVisible) (*RetEvent)->HostVisibleEvent = - reinterpret_cast(*RetEvent); + reinterpret_cast(*RetEvent); return UR_RESULT_SUCCESS; } -ur_result_t ur_event_handle_t_::reset() { +ur_result_t ur_event_handle_legacy_t_::reset() { UrQueue = nullptr; CleanedUp = false; Completed = false; @@ -1316,7 +1342,7 @@ ur_result_t ur_event_handle_t_::reset() { } ur_result_t _ur_ze_event_list_t::createAndRetainUrZeEventList( - uint32_t EventListLength, const ur_event_handle_t *EventList, + uint32_t EventListLength, const ur_event_handle_legacy_t *EventList, ur_queue_handle_legacy_t CurQueue, bool UseCopyEngine) { this->Length = 0; this->ZeEventList = nullptr; @@ -1393,7 +1419,7 @@ ur_result_t _ur_ze_event_list_t::createAndRetainUrZeEventList( if (IncludeLastCommandEvent) { this->ZeEventList = new ze_event_handle_t[EventListLength + 1]; - this->UrEventList = new ur_event_handle_t[EventListLength + 1]; + this->UrEventList = new ur_event_handle_legacy_t[EventListLength + 1]; std::shared_lock Lock(CurQueue->LastCommandEvent->Mutex); this->ZeEventList[0] = CurQueue->LastCommandEvent->ZeEvent; this->UrEventList[0] = CurQueue->LastCommandEvent; @@ -1401,7 +1427,7 @@ ur_result_t _ur_ze_event_list_t::createAndRetainUrZeEventList( TmpListLength = 1; } else if (EventListLength > 0) { this->ZeEventList = new ze_event_handle_t[EventListLength]; - this->UrEventList = new ur_event_handle_t[EventListLength]; + this->UrEventList = new ur_event_handle_legacy_t[EventListLength]; } // For in-order queue and wait-list which is empty or has events only from @@ -1522,7 +1548,7 @@ ur_result_t _ur_ze_event_list_t::createAndRetainUrZeEventList( if (Queue && QueueRootDevice != CurrentQueueRootDevice && !EventList[I]->IsMultiDevice) { ze_event_handle_t MultiDeviceZeEvent = nullptr; - ur_event_handle_t MultiDeviceEvent; + ur_event_handle_legacy_t MultiDeviceEvent; bool IsInternal = true; bool IsMultiDevice = true; @@ -1576,12 +1602,12 @@ ur_result_t _ur_ze_event_list_t::insert(_ur_ze_event_list_t &Other) { // save of the previous object values uint32_t PreLength = this->Length; ze_event_handle_t *PreZeEventList = this->ZeEventList; - ur_event_handle_t *PreUrEventList = this->UrEventList; + ur_event_handle_legacy_t *PreUrEventList = this->UrEventList; // allocate new memory uint32_t Length = PreLength + Other.Length; this->ZeEventList = new ze_event_handle_t[Length]; - this->UrEventList = new ur_event_handle_t[Length]; + this->UrEventList = new ur_event_handle_legacy_t[Length]; // copy elements uint32_t TmpListLength = 0; @@ -1610,7 +1636,7 @@ ur_result_t _ur_ze_event_list_t::insert(_ur_ze_event_list_t &Other) { } ur_result_t _ur_ze_event_list_t::collectEventsForReleaseAndDestroyUrZeEventList( - std::list &EventsToBeReleased) { + std::list &EventsToBeReleased) { // event wait lists are owned by events, this function is called with owning // event lock taken, hence it is thread safe for (uint32_t I = 0; I < Length; I++) { @@ -1632,14 +1658,14 @@ ur_result_t _ur_ze_event_list_t::collectEventsForReleaseAndDestroyUrZeEventList( } // Tells if this event is with profiling capabilities. -bool ur_event_handle_t_::isProfilingEnabled() const { +bool ur_event_handle_legacy_t_::isProfilingEnabled() const { return !UrQueue || // tentatively assume user events are profiling enabled (Legacy(UrQueue)->Properties & UR_QUEUE_FLAG_PROFILING_ENABLE) != 0; } // Tells if this event was created as a timestamp event, allowing profiling // info even if profiling is not enabled. -bool ur_event_handle_t_::isTimestamped() const { +bool ur_event_handle_legacy_t_::isTimestamped() const { // If we are recording, the start time of the event will be non-zero. The // end time might still be missing, depending on whether the corresponding // enqueue is still running. diff --git a/source/adapters/level_zero/event.hpp b/source/adapters/level_zero/event.hpp index e99df2a272..9ade53b1d9 100644 --- a/source/adapters/level_zero/event.hpp +++ b/source/adapters/level_zero/event.hpp @@ -27,11 +27,14 @@ #include "common.hpp" #include "queue.hpp" +struct ur_event_handle_legacy_t_; +using ur_event_handle_legacy_t = ur_event_handle_legacy_t_ *; + extern "C" { -ur_result_t urEventReleaseInternal(ur_event_handle_t Event); +ur_result_t urEventReleaseInternal(ur_event_handle_legacy_t Event); ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_legacy_t Queue, bool IsMultiDevice, - bool HostVisible, ur_event_handle_t *RetEvent, + bool HostVisible, ur_event_handle_legacy_t *RetEvent, bool CounterBasedEventEnabled = false, bool ForceDisableProfiling = false); } // extern "C" @@ -71,7 +74,7 @@ struct _ur_ze_event_list_t { ze_event_handle_t *ZeEventList = {nullptr}; // List of ur_events for this event list. - ur_event_handle_t *UrEventList = {nullptr}; + ur_event_handle_legacy_t *UrEventList = {nullptr}; // length of both the lists. The actual allocation of these lists // may be longer than this length. This length is the actual number @@ -79,7 +82,7 @@ struct _ur_ze_event_list_t { uint32_t Length = {0}; // Initialize this using the array of events in EventList, and retain - // all the ur_event_handle_t in the created data structure. + // all the ur_event_handle_legacy_t in the created data structure. // CurQueue is the ur_queue_handle_t that the command with this event wait // list is going to be added to. That is needed to flush command // batches for wait events that are in other queues. @@ -87,16 +90,15 @@ struct _ur_ze_event_list_t { // event wait-list is for) is going to go to copy or compute // queue. This is used to properly submit the dependent open // command-lists. - ur_result_t createAndRetainUrZeEventList(uint32_t EventListLength, - const ur_event_handle_t *EventList, - ur_queue_handle_legacy_t CurQueue, - bool UseCopyEngine); + ur_result_t createAndRetainUrZeEventList( + uint32_t EventListLength, const ur_event_handle_legacy_t *EventList, + ur_queue_handle_legacy_t CurQueue, bool UseCopyEngine); // Add all the events in this object's UrEventList to the end // of the list EventsToBeReleased. Destroy ur_ze_event_list_t data // structure fields making it look empty. ur_result_t collectEventsForReleaseAndDestroyUrZeEventList( - std::list &EventsToBeReleased); + std::list &EventsToBeReleased); // Had to create custom assignment operator because the mutex is // not assignment copyable. Just field by field copy of the other @@ -122,16 +124,25 @@ struct _ur_ze_event_list_t { void printZeEventList(const _ur_ze_event_list_t &PiZeEventList); -struct ur_event_handle_t_ : _ur_object { - ur_event_handle_t_(ze_event_handle_t ZeEvent, - ze_event_pool_handle_t ZeEventPool, - ur_context_handle_t Context, ur_command_t CommandType, - bool OwnZeEvent) +struct ur_event_handle_t_ { + virtual ~ur_event_handle_t_() {} + virtual ur_result_t retain() = 0; + virtual ur_result_t release() = 0; +}; + +struct ur_event_handle_legacy_t_ : _ur_object, public ur_event_handle_t_ { + ur_event_handle_legacy_t_(ze_event_handle_t ZeEvent, + ze_event_pool_handle_t ZeEventPool, + ur_context_handle_t Context, + ur_command_t CommandType, bool OwnZeEvent) : ZeEvent{ZeEvent}, ZeEventPool{ZeEventPool}, Context{Context}, CommandType{CommandType}, CommandData{nullptr} { OwnNativeHandle = OwnZeEvent; } + ur_result_t retain() override; + ur_result_t release() override; + // Level Zero event handle. ze_event_handle_t ZeEvent; @@ -148,11 +159,11 @@ struct ur_event_handle_t_ : _ur_object { // The HostVisibleEvent is a reference counted PI event and can be used more // than by just this one event, depending on the mode (see EventsScope). // - ur_event_handle_t HostVisibleEvent = {nullptr}; + ur_event_handle_legacy_t HostVisibleEvent = {nullptr}; bool isHostVisible() const { return this == - const_cast( - reinterpret_cast(HostVisibleEvent)); + const_cast( + reinterpret_cast(HostVisibleEvent)); } // Provide direct access to Context, instead of going via queue. @@ -168,7 +179,7 @@ struct ur_event_handle_t_ : _ur_object { // Opaque data to hold any data needed for CommandType. void *CommandData; - // Command list associated with the ur_event_handle_t + // Command list associated with the ur_event_handle_legacy_t std::optional CommandList; // List of events that were in the wait list of the command that will @@ -233,7 +244,7 @@ struct ur_event_handle_t_ : _ur_object { bool hasExternalRefs() { return RefCountExternal != 0; } - // Reset ur_event_handle_t object. + // Reset ur_event_handle_legacy_t object. ur_result_t reset(); // Tells if this event is with profiling capabilities. @@ -253,6 +264,23 @@ struct ur_event_handle_t_ : _ur_object { bool CounterBasedEventsEnabled = false; }; +// Get legacy implementation for event +static inline ur_event_handle_legacy_t Legacy(ur_event_handle_t Event) { + return GetImpl(Event); +} +// Get legacy implementation for event wait list +static inline const ur_event_handle_legacy_t * +Legacy(const ur_event_handle_t *Events) { + if (Events) { + // check if cast succeeds + GetImpl(*Events); + } + return reinterpret_cast(Events); +} +static inline ur_event_handle_legacy_t *Legacy(ur_event_handle_t *Event) { + return reinterpret_cast(Event); +} + // Helper function to implement zeHostSynchronize. // The behavior is to avoid infinite wait during host sync under ZE_DEBUG. // This allows for a much more responsive debugging of hangs. @@ -273,7 +301,7 @@ template <> ze_result_t zeHostSynchronize(ze_command_queue_handle_t Handle); // the event, updates the last command event in the queue and cleans up all dep // events of the event. // If the caller locks queue mutex then it must pass 'true' to QueueLocked. -ur_result_t CleanupCompletedEvent(ur_event_handle_t Event, +ur_result_t CleanupCompletedEvent(ur_event_handle_legacy_t Event, bool QueueLocked = false, bool SetEventCompleted = false); diff --git a/source/adapters/level_zero/image.cpp b/source/adapters/level_zero/image.cpp index 1537a1d201..9a16682b1e 100644 --- a/source/adapters/level_zero/image.cpp +++ b/source/adapters/level_zero/image.cpp @@ -768,8 +768,10 @@ ur_result_t ur_queue_handle_legacy_t_::bindlessImagesImageCopyExp( [[maybe_unused]] ur_exp_image_copy_region_t *pCopyRegion, [[maybe_unused]] ur_exp_image_copy_flags_t imageCopyFlags, [[maybe_unused]] uint32_t numEventsInWaitList, - [[maybe_unused]] const ur_event_handle_t *phEventWaitList, - [[maybe_unused]] ur_event_handle_t *phEvent) { + [[maybe_unused]] const ur_event_handle_t *phUrEventWaitList, + [[maybe_unused]] ur_event_handle_t *phUrEvent) { + auto phEventWaitList = Legacy(phUrEventWaitList); + auto phEvent = Legacy(phUrEvent); auto hQueue = this; std::scoped_lock Lock(hQueue->Mutex); @@ -804,9 +806,9 @@ ur_result_t ur_queue_handle_legacy_t_::bindlessImagesImageCopyExp( OkToBatch)); ze_event_handle_t ZeEvent = nullptr; - ur_event_handle_t InternalEvent; + ur_event_handle_legacy_t InternalEvent; bool IsInternal = phEvent == nullptr; - ur_event_handle_t *Event = phEvent ? phEvent : &InternalEvent; + ur_event_handle_legacy_t *Event = phEvent ? phEvent : &InternalEvent; UR_CALL(createEventAndAssociateQueue(hQueue, Event, UR_COMMAND_MEM_IMAGE_COPY, CommandList, IsInternal, /*IsMultiDevice*/ false)); diff --git a/source/adapters/level_zero/kernel.cpp b/source/adapters/level_zero/kernel.cpp index cb020395ed..f458ce36b2 100644 --- a/source/adapters/level_zero/kernel.cpp +++ b/source/adapters/level_zero/kernel.cpp @@ -121,18 +121,20 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueKernelLaunch( ///< implementation will choose the work-group size. uint32_t NumEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t - *EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] - ///< pointer to a list of events that must be complete - ///< before the kernel execution. If nullptr, the - ///< numEventsInWaitList must be 0, indicating that no - ///< wait event. + *UrEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] + ///< pointer to a list of events that must be complete + ///< before the kernel execution. If nullptr, the + ///< numEventsInWaitList must be 0, indicating that no + ///< wait event. ur_event_handle_t - *OutEvent ///< [in,out][optional] return an event object that identifies - ///< this particular kernel execution instance. + *UrOutEvent ///< [in,out][optional] return an event object that + ///< identifies this particular kernel execution instance. ) { UR_ASSERT(WorkDim > 0, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); UR_ASSERT(WorkDim < 4, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); + auto EventWaitList = Legacy(UrEventWaitList); + auto OutEvent = Legacy(UrOutEvent); auto Queue = this; ze_kernel_handle_t ZeKernel{}; UR_CALL(getZeKernel(Queue, Kernel, &ZeKernel)); @@ -245,9 +247,9 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueKernelLaunch( true /* AllowBatching */)); ze_event_handle_t ZeEvent = nullptr; - ur_event_handle_t InternalEvent{}; + ur_event_handle_legacy_t InternalEvent{}; bool IsInternal = OutEvent == nullptr; - ur_event_handle_t *Event = OutEvent ? OutEvent : &InternalEvent; + ur_event_handle_legacy_t *Event = OutEvent ? OutEvent : &InternalEvent; UR_CALL(createEventAndAssociateQueue(Queue, Event, UR_COMMAND_KERNEL_LAUNCH, CommandList, IsInternal, false)); @@ -330,18 +332,20 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueCooperativeKernelLaunchExp( ///< implementation will choose the work-group size. uint32_t NumEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t - *EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] - ///< pointer to a list of events that must be complete - ///< before the kernel execution. If nullptr, the - ///< numEventsInWaitList must be 0, indicating that no - ///< wait event. + *UrEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] + ///< pointer to a list of events that must be complete + ///< before the kernel execution. If nullptr, the + ///< numEventsInWaitList must be 0, indicating that no + ///< wait event. ur_event_handle_t - *OutEvent ///< [in,out][optional] return an event object that identifies - ///< this particular kernel execution instance. + *UrOutEvent ///< [in,out][optional] return an event object that + ///< identifies this particular kernel execution instance. ) { UR_ASSERT(WorkDim > 0, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); UR_ASSERT(WorkDim < 4, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); + auto EventWaitList = Legacy(UrEventWaitList); + auto OutEvent = Legacy(UrOutEvent); auto Queue = this; auto ZeDevice = Queue->Device->ZeDevice; @@ -509,9 +513,9 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueCooperativeKernelLaunchExp( true /* AllowBatching */)); ze_event_handle_t ZeEvent = nullptr; - ur_event_handle_t InternalEvent{}; + ur_event_handle_legacy_t InternalEvent{}; bool IsInternal = OutEvent == nullptr; - ur_event_handle_t *Event = OutEvent ? OutEvent : &InternalEvent; + ur_event_handle_legacy_t *Event = OutEvent ? OutEvent : &InternalEvent; UR_CALL(createEventAndAssociateQueue(Queue, Event, UR_COMMAND_KERNEL_LAUNCH, CommandList, IsInternal, false)); @@ -586,15 +590,17 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueDeviceGlobalVariableWrite( const void *Src, ///< [in] pointer to where the data must be copied from. uint32_t NumEventsInWaitList, ///< [in] size of the event wait list. const ur_event_handle_t - *EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] - ///< pointer to a list of events that must be complete - ///< before the kernel execution. If nullptr, the - ///< numEventsInWaitList must be 0, indicating that no - ///< wait event. + *UrEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] + ///< pointer to a list of events that must be complete + ///< before the kernel execution. If nullptr, the + ///< numEventsInWaitList must be 0, indicating that no + ///< wait event. ur_event_handle_t - *Event ///< [in,out][optional] return an event object that identifies - ///< this particular kernel execution instance. + *UrEvent ///< [in,out][optional] return an event object that identifies + ///< this particular kernel execution instance. ) { + auto EventWaitList = Legacy(UrEventWaitList); + auto Event = Legacy(UrEvent); auto Queue = this; std::scoped_lock lock(Queue->Mutex); @@ -635,15 +641,17 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueDeviceGlobalVariableRead( void *Dst, ///< [in] pointer to where the data must be copied to. uint32_t NumEventsInWaitList, ///< [in] size of the event wait list. const ur_event_handle_t - *EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] - ///< pointer to a list of events that must be - ///< complete before the kernel execution. If - ///< nullptr, the numEventsInWaitList must be 0, - ///< indicating that no wait event. + *UrEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] + ///< pointer to a list of events that must be + ///< complete before the kernel execution. If + ///< nullptr, the numEventsInWaitList must be 0, + ///< indicating that no wait event. ur_event_handle_t - *Event ///< [in,out][optional] return an event object that - ///< identifies this particular kernel execution instance. + *UrEvent ///< [in,out][optional] return an event object that + ///< identifies this particular kernel execution instance. ) { + auto EventWaitList = Legacy(UrEventWaitList); + auto Event = Legacy(UrEvent); auto Queue = this; std::scoped_lock lock(Queue->Mutex); diff --git a/source/adapters/level_zero/memory.cpp b/source/adapters/level_zero/memory.cpp index 95650a7b94..3a220e7f13 100644 --- a/source/adapters/level_zero/memory.cpp +++ b/source/adapters/level_zero/memory.cpp @@ -49,8 +49,8 @@ ur_result_t enqueueMemCopyHelper(ur_command_t CommandType, ur_queue_handle_legacy_t Queue, void *Dst, ur_bool_t BlockingWrite, size_t Size, const void *Src, uint32_t NumEventsInWaitList, - const ur_event_handle_t *EventWaitList, - ur_event_handle_t *OutEvent, + const ur_event_handle_legacy_t *EventWaitList, + ur_event_handle_legacy_t *OutEvent, bool PreferCopyEngine) { bool UseCopyEngine = Queue->useCopyEngine(PreferCopyEngine); @@ -68,9 +68,9 @@ ur_result_t enqueueMemCopyHelper(ur_command_t CommandType, OkToBatch)); ze_event_handle_t ZeEvent = nullptr; - ur_event_handle_t InternalEvent; + ur_event_handle_legacy_t InternalEvent; bool IsInternal = OutEvent == nullptr; - ur_event_handle_t *Event = OutEvent ? OutEvent : &InternalEvent; + ur_event_handle_legacy_t *Event = OutEvent ? OutEvent : &InternalEvent; UR_CALL(createEventAndAssociateQueue(Queue, Event, CommandType, CommandList, IsInternal, false)); UR_CALL(setSignalEvent(Queue, UseCopyEngine, &ZeEvent, Event, @@ -104,8 +104,8 @@ ur_result_t enqueueMemCopyRectHelper( ur_rect_offset_t DstOrigin, ur_rect_region_t Region, size_t SrcRowPitch, size_t DstRowPitch, size_t SrcSlicePitch, size_t DstSlicePitch, ur_bool_t Blocking, uint32_t NumEventsInWaitList, - const ur_event_handle_t *EventWaitList, ur_event_handle_t *OutEvent, - bool PreferCopyEngine) { + const ur_event_handle_legacy_t *EventWaitList, + ur_event_handle_legacy_t *OutEvent, bool PreferCopyEngine) { bool UseCopyEngine = Queue->useCopyEngine(PreferCopyEngine); _ur_ze_event_list_t TmpWaitList; @@ -122,9 +122,9 @@ ur_result_t enqueueMemCopyRectHelper( OkToBatch)); ze_event_handle_t ZeEvent = nullptr; - ur_event_handle_t InternalEvent; + ur_event_handle_legacy_t InternalEvent; bool IsInternal = OutEvent == nullptr; - ur_event_handle_t *Event = OutEvent ? OutEvent : &InternalEvent; + ur_event_handle_legacy_t *Event = OutEvent ? OutEvent : &InternalEvent; UR_CALL(createEventAndAssociateQueue(Queue, Event, CommandType, CommandList, IsInternal, false)); UR_CALL(setSignalEvent(Queue, UseCopyEngine, &ZeEvent, Event, @@ -184,13 +184,12 @@ ur_result_t enqueueMemCopyRectHelper( } // PI interfaces must have queue's and buffer's mutexes locked on entry. -static ur_result_t enqueueMemFillHelper(ur_command_t CommandType, - ur_queue_handle_legacy_t Queue, - void *Ptr, const void *Pattern, - size_t PatternSize, size_t Size, - uint32_t NumEventsInWaitList, - const ur_event_handle_t *EventWaitList, - ur_event_handle_t *OutEvent) { +static ur_result_t +enqueueMemFillHelper(ur_command_t CommandType, ur_queue_handle_legacy_t Queue, + void *Ptr, const void *Pattern, size_t PatternSize, + size_t Size, uint32_t NumEventsInWaitList, + const ur_event_handle_legacy_t *EventWaitList, + ur_event_handle_legacy_t *OutEvent) { auto &Device = Queue->Device; // Make sure that pattern size matches the capability of the copy queues. @@ -231,9 +230,9 @@ static ur_result_t enqueueMemFillHelper(ur_command_t CommandType, OkToBatch)); ze_event_handle_t ZeEvent = nullptr; - ur_event_handle_t InternalEvent; + ur_event_handle_legacy_t InternalEvent; bool IsInternal = OutEvent == nullptr; - ur_event_handle_t *Event = OutEvent ? OutEvent : &InternalEvent; + ur_event_handle_legacy_t *Event = OutEvent ? OutEvent : &InternalEvent; UR_CALL(createEventAndAssociateQueue(Queue, Event, CommandType, CommandList, IsInternal, false)); UR_CALL(setSignalEvent(Queue, UseCopyEngine, &ZeEvent, Event, @@ -330,8 +329,8 @@ static ur_result_t enqueueMemImageCommandHelper( ur_bool_t IsBlocking, ur_rect_offset_t *SrcOrigin, ur_rect_offset_t *DstOrigin, ur_rect_region_t *Region, size_t RowPitch, size_t SlicePitch, uint32_t NumEventsInWaitList, - const ur_event_handle_t *EventWaitList, ur_event_handle_t *OutEvent, - bool PreferCopyEngine = false) { + const ur_event_handle_legacy_t *EventWaitList, + ur_event_handle_legacy_t *OutEvent, bool PreferCopyEngine = false) { bool UseCopyEngine = Queue->useCopyEngine(PreferCopyEngine); _ur_ze_event_list_t TmpWaitList; @@ -348,9 +347,9 @@ static ur_result_t enqueueMemImageCommandHelper( OkToBatch)); ze_event_handle_t ZeEvent = nullptr; - ur_event_handle_t InternalEvent; + ur_event_handle_legacy_t InternalEvent; bool IsInternal = OutEvent == nullptr; - ur_event_handle_t *Event = OutEvent ? OutEvent : &InternalEvent; + ur_event_handle_legacy_t *Event = OutEvent ? OutEvent : &InternalEvent; UR_CALL(createEventAndAssociateQueue(Queue, Event, CommandType, CommandList, IsInternal, false)); UR_CALL(setSignalEvent(Queue, UseCopyEngine, &ZeEvent, Event, @@ -469,16 +468,18 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferRead( void *pDst, ///< [in] pointer to host memory where data is to be read into uint32_t numEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t - *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] - ///< pointer to a list of events that must be complete - ///< before this command can be executed. If nullptr, - ///< the numEventsInWaitList must be 0, indicating - ///< that this command does not wait on any event to - ///< complete. + *phUrEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] + ///< pointer to a list of events that must be + ///< complete before this command can be executed. + ///< If nullptr, the numEventsInWaitList must be 0, + ///< indicating that this command does not wait on + ///< any event to complete. ur_event_handle_t - *phEvent ///< [in,out][optional] return an event object that identifies - ///< this particular command instance. + *phUrEvent ///< [in,out][optional] return an event object that + ///< identifies this particular command instance. ) { + auto phEventWaitList = Legacy(phUrEventWaitList); + auto phEvent = Legacy(phUrEvent); auto Queue = this; ur_mem_handle_t_ *Src = ur_cast(hBuffer); @@ -505,16 +506,18 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferWrite( *pSrc, ///< [in] pointer to host memory where data is to be written from uint32_t numEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t - *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] - ///< pointer to a list of events that must be complete - ///< before this command can be executed. If nullptr, - ///< the numEventsInWaitList must be 0, indicating - ///< that this command does not wait on any event to - ///< complete. + *phUrEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] + ///< pointer to a list of events that must be + ///< complete before this command can be executed. + ///< If nullptr, the numEventsInWaitList must be 0, + ///< indicating that this command does not wait on + ///< any event to complete. ur_event_handle_t - *phEvent ///< [in,out][optional] return an event object that identifies - ///< this particular command instance. + *phUrEvent ///< [in,out][optional] return an event object that + ///< identifies this particular command instance. ) { + auto phEventWaitList = Legacy(phUrEventWaitList); + auto phEvent = Legacy(phUrEvent); auto Queue = this; ur_mem_handle_t_ *Buffer = ur_cast(hBuffer); @@ -550,16 +553,18 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferReadRect( void *pDst, ///< [in] pointer to host memory where data is to be read into uint32_t numEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t - *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] - ///< pointer to a list of events that must be complete - ///< before this command can be executed. If nullptr, - ///< the numEventsInWaitList must be 0, indicating - ///< that this command does not wait on any event to - ///< complete. + *phUrEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] + ///< pointer to a list of events that must be + ///< complete before this command can be executed. + ///< If nullptr, the numEventsInWaitList must be 0, + ///< indicating that this command does not wait on + ///< any event to complete. ur_event_handle_t - *phEvent ///< [in,out][optional] return an event object that identifies - ///< this particular command instance. + *phUrEvent ///< [in,out][optional] return an event object that + ///< identifies this particular command instance. ) { + auto phEventWaitList = Legacy(phUrEventWaitList); + auto phEvent = Legacy(phUrEvent); auto Queue = this; ur_mem_handle_t_ *Buffer = ur_cast(hBuffer); @@ -597,16 +602,18 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferWriteRect( *pSrc, ///< [in] pointer to host memory where data is to be written from uint32_t numEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t - *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] - ///< points to a list of events that must be complete - ///< before this command can be executed. If nullptr, - ///< the numEventsInWaitList must be 0, indicating - ///< that this command does not wait on any event to - ///< complete. + *phUrEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] + ///< points to a list of events that must be + ///< complete before this command can be executed. + ///< If nullptr, the numEventsInWaitList must be 0, + ///< indicating that this command does not wait on + ///< any event to complete. ur_event_handle_t - *phEvent ///< [in,out][optional] return an event object that identifies - ///< this particular command instance. + *phUrEvent ///< [in,out][optional] return an event object that + ///< identifies this particular command instance. ) { + auto phEventWaitList = Legacy(phUrEventWaitList); + auto phEvent = Legacy(phUrEvent); auto Queue = this; ur_mem_handle_t_ *Buffer = ur_cast(hBuffer); @@ -632,16 +639,18 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferCopy( size_t Size, ///< [in] size in bytes of data being copied uint32_t NumEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t - *EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] - ///< pointer to a list of events that must be complete - ///< before this command can be executed. If nullptr, - ///< the numEventsInWaitList must be 0, indicating - ///< that this command does not wait on any event to - ///< complete. + *UrEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] + ///< pointer to a list of events that must be complete + ///< before this command can be executed. If nullptr, + ///< the numEventsInWaitList must be 0, indicating + ///< that this command does not wait on any event to + ///< complete. ur_event_handle_t - *OutEvent ///< [in,out][optional] return an event object that identifies - ///< this particular command instance. + *UrOutEvent ///< [in,out][optional] return an event object that + ///< identifies this particular command instance. ) { + auto EventWaitList = Legacy(UrEventWaitList); + auto OutEvent = Legacy(UrOutEvent); auto Queue = this; _ur_buffer *SrcBuffer = ur_cast<_ur_buffer *>(BufferSrc); _ur_buffer *DstBuffer = ur_cast<_ur_buffer *>(BufferDst); @@ -694,16 +703,18 @@ ur_queue_handle_legacy_t_::enqueueMemBufferCopyRect( ///< [in] handle of the ///< destination buffer object uint32_t NumEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t - *EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] - ///< pointer to a list of events that must be complete - ///< before this command can be executed. If nullptr, - ///< the numEventsInWaitList must be 0, indicating - ///< that this command does not wait on any event to - ///< complete. + *UrEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] + ///< pointer to a list of events that must be complete + ///< before this command can be executed. If nullptr, + ///< the numEventsInWaitList must be 0, indicating + ///< that this command does not wait on any event to + ///< complete. ur_event_handle_t - *OutEvent ///< [in,out][optional] return an event object that identifies - ///< this particular command instance. + *UrOutEvent ///< [in,out][optional] return an event object that + ///< identifies this particular command instance. ) { + auto EventWaitList = Legacy(UrEventWaitList); + auto OutEvent = Legacy(UrOutEvent); auto Queue = this; _ur_buffer *SrcBuffer = ur_cast<_ur_buffer *>(BufferSrc); _ur_buffer *DstBuffer = ur_cast<_ur_buffer *>(BufferDst); @@ -743,16 +754,18 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferFill( size_t Size, ///< [in] fill size in bytes, must be a multiple of patternSize uint32_t NumEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t - *EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] - ///< pointer to a list of events that must be complete - ///< before this command can be executed. If nullptr, - ///< the numEventsInWaitList must be 0, indicating - ///< that this command does not wait on any event to - ///< complete. + *UrEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] + ///< pointer to a list of events that must be complete + ///< before this command can be executed. If nullptr, + ///< the numEventsInWaitList must be 0, indicating + ///< that this command does not wait on any event to + ///< complete. ur_event_handle_t - *OutEvent ///< [in,out][optional] return an event object that identifies - ///< this particular command instance. + *UrOutEvent ///< [in,out][optional] return an event object that + ///< identifies this particular command instance. ) { + auto EventWaitList = Legacy(UrEventWaitList); + auto OutEvent = Legacy(UrOutEvent); auto Queue = this; std::scoped_lock Lock(Queue->Mutex, Buffer->Mutex); @@ -780,16 +793,18 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemImageRead( void *Dst, ///< [in] pointer to host memory where image is to be read into uint32_t NumEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t - *EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] - ///< pointer to a list of events that must be complete - ///< before this command can be executed. If nullptr, - ///< the numEventsInWaitList must be 0, indicating - ///< that this command does not wait on any event to - ///< complete. + *UrEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] + ///< pointer to a list of events that must be complete + ///< before this command can be executed. If nullptr, + ///< the numEventsInWaitList must be 0, indicating + ///< that this command does not wait on any event to + ///< complete. ur_event_handle_t - *OutEvent ///< [in,out][optional] return an event object that identifies - ///< this particular command instance. + *UrOutEvent ///< [in,out][optional] return an event object that + ///< identifies this particular command instance. ) { + auto EventWaitList = Legacy(UrEventWaitList); + auto OutEvent = Legacy(UrOutEvent); auto Queue = this; std::scoped_lock Lock(Queue->Mutex, Image->Mutex); @@ -812,16 +827,18 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemImageWrite( void *Src, ///< [in] pointer to host memory where image is to be read into uint32_t NumEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t - *EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] - ///< pointer to a list of events that must be complete - ///< before this command can be executed. If nullptr, - ///< the numEventsInWaitList must be 0, indicating - ///< that this command does not wait on any event to - ///< complete. + *UrEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] + ///< pointer to a list of events that must be complete + ///< before this command can be executed. If nullptr, + ///< the numEventsInWaitList must be 0, indicating + ///< that this command does not wait on any event to + ///< complete. ur_event_handle_t - *OutEvent ///< [in,out][optional] return an event object that identifies - ///< this particular command instance. + *UrOutEvent ///< [in,out][optional] return an event object that + ///< identifies this particular command instance. ) { + auto EventWaitList = Legacy(UrEventWaitList); + auto OutEvent = Legacy(UrOutEvent); auto Queue = this; std::scoped_lock Lock(Queue->Mutex, Image->Mutex); @@ -844,16 +861,18 @@ ur_queue_handle_legacy_t_::enqueueMemImageCopy( ///< [in] handle of ///< pixels of the 1D, 2D, or 3D image uint32_t NumEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t - *EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] - ///< pointer to a list of events that must be complete - ///< before this command can be executed. If nullptr, - ///< the numEventsInWaitList must be 0, indicating - ///< that this command does not wait on any event to - ///< complete. + *UrEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] + ///< pointer to a list of events that must be complete + ///< before this command can be executed. If nullptr, + ///< the numEventsInWaitList must be 0, indicating + ///< that this command does not wait on any event to + ///< complete. ur_event_handle_t - *OutEvent ///< [in,out][optional] return an event object that identifies - ///< this particular command instance. + *UrOutEvent ///< [in,out][optional] return an event object that + ///< identifies this particular command instance. ) { + auto EventWaitList = Legacy(UrEventWaitList); + auto OutEvent = Legacy(UrOutEvent); auto Queue = this; std::shared_lock SrcLock(ImageSrc->Mutex, std::defer_lock); std::scoped_lock, ur_shared_mutex, @@ -880,26 +899,28 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferMap( size_t Size, ///< [in] size in bytes of the buffer region being mapped uint32_t NumEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t - *EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] - ///< pointer to a list of events that must be complete - ///< before this command can be executed. If nullptr, - ///< the numEventsInWaitList must be 0, indicating - ///< that this command does not wait on any event to - ///< complete. + *UrEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] + ///< pointer to a list of events that must be complete + ///< before this command can be executed. If nullptr, + ///< the numEventsInWaitList must be 0, indicating + ///< that this command does not wait on any event to + ///< complete. ur_event_handle_t - *OutEvent, ///< [in,out][optional] return an event object that - ///< identifies this particular command instance. - void **RetMap ///< [in,out] return mapped pointer. TODO: move it before - ///< numEventsInWaitList? + *UrOutEvent, ///< [in,out][optional] return an event object that + ///< identifies this particular command instance. + void **RetMap ///< [in,out] return mapped pointer. TODO: move it before + ///< numEventsInWaitList? ) { + auto EventWaitList = Legacy(UrEventWaitList); + auto OutEvent = Legacy(UrOutEvent); auto Queue = this; auto Buffer = ur_cast<_ur_buffer *>(Buf); UR_ASSERT(!Buffer->isImage(), UR_RESULT_ERROR_INVALID_MEM_OBJECT); - ur_event_handle_t InternalEvent; + ur_event_handle_legacy_t InternalEvent; bool IsInternal = OutEvent == nullptr; - ur_event_handle_t *Event = OutEvent ? OutEvent : &InternalEvent; + ur_event_handle_legacy_t *Event = OutEvent ? OutEvent : &InternalEvent; ze_event_handle_t ZeEvent = nullptr; bool UseCopyEngine = false; @@ -951,7 +972,7 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferMap( if (Buffer->OnHost) { // Wait on incoming events before doing the copy if (NumEventsInWaitList > 0) - UR_CALL(urEventWait(NumEventsInWaitList, EventWaitList)); + UR_CALL(urEventWait(NumEventsInWaitList, UrEventWaitList)); if (Queue->isInOrderQueue()) UR_CALL(urQueueFinish(Queue)); @@ -1010,7 +1031,8 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferMap( Queue, CommandList, UseCopyEngine, NumEventsInWaitList, EventWaitList)); // Add the event to the command list. - CommandList->second.append(reinterpret_cast(*Event)); + CommandList->second.append( + reinterpret_cast(*Event)); (*Event)->RefCount.increment(); const auto &ZeCommandList = CommandList->first; @@ -1045,16 +1067,18 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemUnmap( void *MappedPtr, ///< [in] mapped host address uint32_t NumEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t - *EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] - ///< pointer to a list of events that must be complete - ///< before this command can be executed. If nullptr, - ///< the numEventsInWaitList must be 0, indicating - ///< that this command does not wait on any event to - ///< complete. + *UrEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] + ///< pointer to a list of events that must be complete + ///< before this command can be executed. If nullptr, + ///< the numEventsInWaitList must be 0, indicating + ///< that this command does not wait on any event to + ///< complete. ur_event_handle_t - *OutEvent ///< [in,out][optional] return an event object that identifies - ///< this particular command instance. + *UrOutEvent ///< [in,out][optional] return an event object that + ///< identifies this particular command instance. ) { + auto EventWaitList = Legacy(UrEventWaitList); + auto OutEvent = Legacy(UrOutEvent); auto Queue = this; UR_ASSERT(!Mem->isImage(), UR_RESULT_ERROR_INVALID_MEM_OBJECT); @@ -1063,9 +1087,9 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemUnmap( bool UseCopyEngine = false; ze_event_handle_t ZeEvent = nullptr; - ur_event_handle_t InternalEvent; + ur_event_handle_legacy_t InternalEvent; bool IsInternal = OutEvent == nullptr; - ur_event_handle_t *Event = OutEvent ? OutEvent : &InternalEvent; + ur_event_handle_legacy_t *Event = OutEvent ? OutEvent : &InternalEvent; { // Lock automatically releases when this goes out of scope. std::scoped_lock lock(Queue->Mutex); @@ -1107,7 +1131,7 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemUnmap( if (Buffer->OnHost) { // Wait on incoming events before doing the copy if (NumEventsInWaitList > 0) - UR_CALL(urEventWait(NumEventsInWaitList, EventWaitList)); + UR_CALL(urEventWait(NumEventsInWaitList, UrEventWaitList)); if (Queue->isInOrderQueue()) UR_CALL(urQueueFinish(Queue)); @@ -1136,7 +1160,8 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemUnmap( reinterpret_cast(Queue), CommandList, UseCopyEngine, NumEventsInWaitList, EventWaitList)); - CommandList->second.append(reinterpret_cast(*Event)); + CommandList->second.append( + reinterpret_cast(*Event)); (*Event)->RefCount.increment(); const auto &ZeCommandList = CommandList->first; @@ -1174,16 +1199,18 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueUSMMemcpy( size_t Size, ///< [in] size in bytes to be copied uint32_t NumEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t - *EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] - ///< pointer to a list of events that must be complete - ///< before this command can be executed. If nullptr, - ///< the numEventsInWaitList must be 0, indicating - ///< that this command does not wait on any event to - ///< complete. + *UrEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] + ///< pointer to a list of events that must be complete + ///< before this command can be executed. If nullptr, + ///< the numEventsInWaitList must be 0, indicating + ///< that this command does not wait on any event to + ///< complete. ur_event_handle_t - *OutEvent ///< [in,out][optional] return an event object that identifies - ///< this particular command instance. + *UrOutEvent ///< [in,out][optional] return an event object that + ///< identifies this particular command instance. ) { + auto EventWaitList = Legacy(UrEventWaitList); + auto OutEvent = Legacy(UrOutEvent); auto Queue = this; std::scoped_lock lock(Queue->Mutex); @@ -1206,16 +1233,18 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueUSMPrefetch( ur_usm_migration_flags_t Flags, ///< [in] USM prefetch flags uint32_t NumEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t - *EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] - ///< pointer to a list of events that must be complete - ///< before this command can be executed. If nullptr, - ///< the numEventsInWaitList must be 0, indicating - ///< that this command does not wait on any event to - ///< complete. + *UrEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] + ///< pointer to a list of events that must be complete + ///< before this command can be executed. If nullptr, + ///< the numEventsInWaitList must be 0, indicating + ///< that this command does not wait on any event to + ///< complete. ur_event_handle_t - *OutEvent ///< [in,out][optional] return an event object that identifies - ///< this particular command instance. + *UrOutEvent ///< [in,out][optional] return an event object that + ///< identifies this particular command instance. ) { + auto EventWaitList = Legacy(UrEventWaitList); + auto OutEvent = Legacy(UrOutEvent); auto Queue = this; std::ignore = Flags; // Lock automatically releases when this goes out of scope. @@ -1242,9 +1271,9 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueUSMPrefetch( // TODO: do we need to create a unique command type for this? ze_event_handle_t ZeEvent = nullptr; - ur_event_handle_t InternalEvent; + ur_event_handle_legacy_t InternalEvent; bool IsInternal = OutEvent == nullptr; - ur_event_handle_t *Event = OutEvent ? OutEvent : &InternalEvent; + ur_event_handle_legacy_t *Event = OutEvent ? OutEvent : &InternalEvent; UR_CALL(createEventAndAssociateQueue(Queue, Event, UR_COMMAND_USM_PREFETCH, CommandList, IsInternal, false)); ZeEvent = (*Event)->ZeEvent; @@ -1273,9 +1302,10 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueUSMAdvise( size_t Size, ///< [in] size in bytes to be advised ur_usm_advice_flags_t Advice, ///< [in] USM memory advice ur_event_handle_t - *OutEvent ///< [in,out][optional] return an event object that identifies - ///< this particular command instance. + *UrOutEvent ///< [in,out][optional] return an event object that + ///< identifies this particular command instance. ) { + auto OutEvent = Legacy(UrOutEvent); auto Queue = this; // Lock automatically releases when this goes out of scope. std::scoped_lock lock(Queue->Mutex); @@ -1298,9 +1328,9 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueUSMAdvise( // TODO: do we need to create a unique command type for this? ze_event_handle_t ZeEvent = nullptr; - ur_event_handle_t InternalEvent{}; + ur_event_handle_legacy_t InternalEvent{}; bool IsInternal = OutEvent == nullptr; - ur_event_handle_t *Event = OutEvent ? OutEvent : &InternalEvent; + ur_event_handle_legacy_t *Event = OutEvent ? OutEvent : &InternalEvent; UR_CALL(createEventAndAssociateQueue(Queue, Event, UR_COMMAND_USM_ADVISE, CommandList, IsInternal, false)); ZeEvent = (*Event)->ZeEvent; @@ -1371,15 +1401,17 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueUSMMemcpy2D( size_t Height, ///< [in] the height of columns to be copied. uint32_t NumEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t - *EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] - ///< pointer to a list of events that must be complete - ///< before the kernel execution. If nullptr, the - ///< numEventsInWaitList must be 0, indicating that no - ///< wait event. + *UrEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] + ///< pointer to a list of events that must be complete + ///< before the kernel execution. If nullptr, the + ///< numEventsInWaitList must be 0, indicating that no + ///< wait event. ur_event_handle_t - *Event ///< [in,out][optional] return an event object that identifies - ///< this particular kernel execution instance. + *UrEvent ///< [in,out][optional] return an event object that identifies + ///< this particular kernel execution instance. ) { + auto EventWaitList = Legacy(UrEventWaitList); + auto Event = Legacy(UrEvent); auto Queue = this; ur_rect_offset_t ZeroOffset{0, 0, 0}; ur_rect_region_t Region{Width, Height, 0}; @@ -2285,15 +2317,18 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueUSMFill( size_t Size, ///< [in] size in bytes to be set. Must be a multiple of ///< patternSize. uint32_t NumEventsInWaitList, ///< [in] size of the event wait list - const ur_event_handle_t * - EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] - ///< pointer to a list of events that must be complete - ///< before this command can be executed. If nullptr, the - ///< numEventsInWaitList must be 0, indicating that this - ///< command does not wait on any event to complete. - ur_event_handle_t *Event ///< [out][optional] return an event object that - ///< identifies this particular command instance. + const ur_event_handle_t + *UrEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] + ///< pointer to a list of events that must be complete + ///< before this command can be executed. If nullptr, + ///< the numEventsInWaitList must be 0, indicating + ///< that this command does not wait on any event to + ///< complete. + ur_event_handle_t *UrEvent ///< [out][optional] return an event object that + ///< identifies this particular command instance. ) { + auto EventWaitList = Legacy(UrEventWaitList); + auto Event = Legacy(UrEvent); auto Queue = this; std::scoped_lock Lock(Queue->Mutex); diff --git a/source/adapters/level_zero/memory.hpp b/source/adapters/level_zero/memory.hpp index b590165947..23b85ad16b 100644 --- a/source/adapters/level_zero/memory.hpp +++ b/source/adapters/level_zero/memory.hpp @@ -50,8 +50,8 @@ ur_result_t enqueueMemCopyHelper(ur_command_t CommandType, ur_queue_handle_legacy_t Queue, void *Dst, ur_bool_t BlockingWrite, size_t Size, const void *Src, uint32_t NumEventsInWaitList, - const ur_event_handle_t *EventWaitList, - ur_event_handle_t *OutEvent, + const ur_event_handle_legacy_t *EventWaitList, + ur_event_handle_legacy_t *OutEvent, bool PreferCopyEngine); ur_result_t enqueueMemCopyRectHelper( @@ -60,8 +60,8 @@ ur_result_t enqueueMemCopyRectHelper( ur_rect_offset_t DstOrigin, ur_rect_region_t Region, size_t SrcRowPitch, size_t DstRowPitch, size_t SrcSlicePitch, size_t DstSlicePitch, ur_bool_t Blocking, uint32_t NumEventsInWaitList, - const ur_event_handle_t *EventWaitList, ur_event_handle_t *OutEvent, - bool PreferCopyEngine = false); + const ur_event_handle_legacy_t *EventWaitList, + ur_event_handle_legacy_t *OutEvent, bool PreferCopyEngine = false); struct ur_mem_handle_t_ : _ur_object { // Keeps the PI context of this memory handle. diff --git a/source/adapters/level_zero/queue.cpp b/source/adapters/level_zero/queue.cpp index f467447753..7736577ee9 100644 --- a/source/adapters/level_zero/queue.cpp +++ b/source/adapters/level_zero/queue.cpp @@ -120,14 +120,14 @@ ur_result_t ur_completion_batch::seal(ur_queue_handle_legacy_t queue, return UR_RESULT_SUCCESS; } -void ur_completion_batches::append(ur_event_handle_t event) { +void ur_completion_batches::append(ur_event_handle_legacy_t event) { active->append(); event->completionBatch = active; } void ur_completion_batches::moveCompletedEvents( - ur_completion_batch_it it, std::vector &events, - std::vector &EventListToCleanup) { + ur_completion_batch_it it, std::vector &events, + std::vector &EventListToCleanup) { // This works by tagging all events belonging to a batch, and then removing // all events in a vector with the tag (iterator) of the active batch. // This could be optimized to remove a specific range of entries if we had a @@ -148,8 +148,8 @@ void ur_completion_batches::moveCompletedEvents( } ur_result_t ur_completion_batches::cleanup( - std::vector &events, - std::vector &EventListToCleanup) { + std::vector &events, + std::vector &EventListToCleanup) { bool cleaned = false; while (!sealed.empty()) { auto oldest_sealed = sealed.front(); @@ -190,8 +190,8 @@ ur_completion_batches::ur_completion_batches() { ur_result_t ur_completion_batches::tryCleanup( ur_queue_handle_legacy_t queue, ze_command_list_handle_t cmdlist, - std::vector &events, - std::vector &EventListToCleanup) { + std::vector &events, + std::vector &EventListToCleanup) { cleanup(events, EventListToCleanup); if (active->isFull()) { @@ -231,17 +231,18 @@ void ur_completion_batches::forceReset() { /// the call, in case of in-order queue it allows to cleanup all preceding /// events. /// @return PI_SUCCESS if successful, PI error code otherwise. -ur_result_t CleanupEventsInImmCmdLists(ur_queue_handle_legacy_t UrQueue, - bool QueueLocked, bool QueueSynced, - ur_event_handle_t CompletedEvent) { +ur_result_t +CleanupEventsInImmCmdLists(ur_queue_handle_legacy_t UrQueue, bool QueueLocked, + bool QueueSynced, + ur_event_handle_legacy_t CompletedEvent) { // Handle only immediate command lists here. if (!UrQueue || !UrQueue->UsingImmCmdLists) return UR_RESULT_SUCCESS; - ur_event_handle_t_ *UrCompletedEvent = - reinterpret_cast(CompletedEvent); + ur_event_handle_legacy_t_ *UrCompletedEvent = + reinterpret_cast(CompletedEvent); - std::vector EventListToCleanup; + std::vector EventListToCleanup; { std::unique_lock QueueLock(UrQueue->Mutex, std::defer_lock); @@ -252,7 +253,7 @@ ur_result_t CleanupEventsInImmCmdLists(ur_queue_handle_legacy_t UrQueue, // commands so we can't do full cleanup. if (QueueLocked && (QueueSynced || (UrQueue->isInOrderQueue() && - (reinterpret_cast( + (reinterpret_cast( UrCompletedEvent) == UrQueue->LastCommandEvent || !UrQueue->LastCommandEvent)))) { UrQueue->LastCommandEvent = nullptr; @@ -317,7 +318,7 @@ ur_result_t resetCommandLists(ur_queue_handle_legacy_t Queue) { // nested locks, because event cleanup requires event to be locked. Nested // locks are hard to control and can cause deadlocks if mutexes are locked in // different order. - std::vector EventListToCleanup; + std::vector EventListToCleanup; // We check for command lists that have been already signalled, but have not // been added to the available list yet. Each command list has a fence @@ -556,7 +557,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( for (int I = 0; I < 10; ++I) { UR_CALL(Q->createCommandList(UseCopyEngine, CommandList)); // Immediately return them to the cache of available command-lists. - std::vector EventsUnused; + std::vector EventsUnused; UR_CALL(Q->resetCommandList(CommandList, true /* MakeAvailable */, EventsUnused)); } @@ -598,7 +599,7 @@ ur_result_t ur_queue_handle_legacy_t_::queueRetain() { ur_result_t ur_queue_handle_legacy_t_::queueRelease() { auto Queue = this; - std::vector EventListToCleanup; + std::vector EventListToCleanup; { std::scoped_lock Lock(Queue->Mutex); @@ -692,7 +693,8 @@ ur_result_t ur_queue_handle_legacy_t_::queueRelease() { UR_CALL(CleanupCompletedEvent(Event)); // This event was removed from the command list, so decrement ref count // (it was incremented when they were added to the command list). - UR_CALL(urEventReleaseInternal(reinterpret_cast(Event))); + UR_CALL(urEventReleaseInternal( + reinterpret_cast(Event))); } UR_CALL(urQueueReleaseInternal(Queue)); return UR_RESULT_SUCCESS; @@ -1351,11 +1353,11 @@ ur_result_t ur_queue_handle_legacy_t_::executeCommandList( auto Result = std::find_if( CommandList->second.EventList.begin(), CommandList->second.EventList.end(), - [](ur_event_handle_t E) { return E->hasExternalRefs(); }); + [](ur_event_handle_legacy_t E) { return E->hasExternalRefs(); }); if (Result != CommandList->second.EventList.end()) { // Create a "proxy" host-visible event. // - ur_event_handle_t HostVisibleEvent; + ur_event_handle_legacy_t HostVisibleEvent; auto Res = createEventAndAssociateQueue( reinterpret_cast(this), &HostVisibleEvent, UR_EXT_COMMAND_TYPE_USER, CommandList, @@ -1374,7 +1376,7 @@ ur_result_t ur_queue_handle_legacy_t_::executeCommandList( if (!Event->HostVisibleEvent) { Event->HostVisibleEvent = - reinterpret_cast(HostVisibleEvent); + reinterpret_cast(HostVisibleEvent); HostVisibleEvent->RefCount.increment(); } } @@ -1461,7 +1463,7 @@ ur_result_t ur_queue_handle_legacy_t_::executeCommandList( this->Healthy = false; // Reset Command List and erase the Fence forcing the user to resubmit // their commands. - std::vector EventListToCleanup; + std::vector EventListToCleanup; resetCommandList(CommandList, true, EventListToCleanup, false); CleanupEventListFromResetCmdList(EventListToCleanup, true /* QueueLocked */); @@ -1495,12 +1497,12 @@ ur_result_t ur_queue_handle_legacy_t_::resetDiscardedEvent( (CommandList->first, LastCommandEvent->ZeEvent)); } - // Create new ur_event_handle_t but with the same ze_event_handle_t. We are - // going to use this ur_event_handle_t for the next command with discarded - // event. - ur_event_handle_t_ *UREvent; + // Create new ur_event_handle_legacy_t but with the same ze_event_handle_t. + // We are going to use this ur_event_handle_legacy_t for the next command + // with discarded event. + ur_event_handle_legacy_t_ *UREvent; try { - UREvent = new ur_event_handle_t_( + UREvent = new ur_event_handle_legacy_t_( LastCommandEvent->ZeEvent, LastCommandEvent->ZeEventPool, reinterpret_cast(Context), UR_EXT_COMMAND_TYPE_USER, true); @@ -1511,16 +1513,18 @@ ur_result_t ur_queue_handle_legacy_t_::resetDiscardedEvent( } if (LastCommandEvent->isHostVisible()) - UREvent->HostVisibleEvent = reinterpret_cast(UREvent); + UREvent->HostVisibleEvent = + reinterpret_cast(UREvent); - UR_CALL(addEventToQueueCache(reinterpret_cast(UREvent))); + UR_CALL(addEventToQueueCache( + reinterpret_cast(UREvent))); } return UR_RESULT_SUCCESS; } -ur_result_t -ur_queue_handle_legacy_t_::addEventToQueueCache(ur_event_handle_t Event) { +ur_result_t ur_queue_handle_legacy_t_::addEventToQueueCache( + ur_event_handle_legacy_t Event) { if (!Event->IsMultiDevice) { auto EventCachesMap = Event->isHostVisible() ? &EventCachesDeviceMap[0] : &EventCachesDeviceMap[1]; @@ -1536,7 +1540,8 @@ ur_queue_handle_legacy_t_::addEventToQueueCache(ur_event_handle_t Event) { return UR_RESULT_SUCCESS; } -void ur_queue_handle_legacy_t_::active_barriers::add(ur_event_handle_t &Event) { +void ur_queue_handle_legacy_t_::active_barriers::add( + ur_event_handle_legacy_t &Event) { Event->RefCount.increment(); Events.push_back(Event); } @@ -1649,7 +1654,8 @@ bool ur_queue_handle_legacy_t_::isInOrderQueue() const { // Helper function to perform the necessary cleanup of the events from reset cmd // list. ur_result_t CleanupEventListFromResetCmdList( - std::vector &EventListToCleanup, bool QueueLocked) { + std::vector &EventListToCleanup, + bool QueueLocked) { for (auto &Event : EventListToCleanup) { // We don't need to synchronize the events since the fence associated with // the command list was synchronized. @@ -1765,10 +1771,10 @@ ur_result_t ur_queue_handle_legacy_t_::synchronize() { return UR_RESULT_SUCCESS; } -ur_event_handle_t +ur_event_handle_legacy_t ur_queue_handle_legacy_t_::getEventFromQueueCache(bool IsMultiDevice, bool HostVisible) { - std::list *Cache; + std::list *Cache; if (!IsMultiDevice) { auto Device = this->Device; @@ -1790,7 +1796,7 @@ ur_queue_handle_legacy_t_::getEventFromQueueCache(bool IsMultiDevice, // If there are two events then return an event from the beginning of the list // since event of the last command is added to the end of the list. auto It = Cache->begin(); - ur_event_handle_t RetEvent = *It; + ur_event_handle_legacy_t RetEvent = *It; Cache->erase(It); return RetEvent; } @@ -1801,7 +1807,7 @@ ur_queue_handle_legacy_t_::getEventFromQueueCache(bool IsMultiDevice, // this batch. bool eventCanBeBatched(ur_queue_handle_legacy_t Queue, bool UseCopyEngine, uint32_t NumEventsInWaitList, - const ur_event_handle_t *EventWaitList) { + const ur_event_handle_legacy_t *EventWaitList) { auto &CommandBatch = UseCopyEngine ? Queue->CopyCommandBatch : Queue->ComputeCommandBatch; // First see if there is an command-list open for batching commands @@ -1830,9 +1836,10 @@ bool eventCanBeBatched(ur_queue_handle_legacy_t Queue, bool UseCopyEngine, // a command list batch. The signal event will be appended at the end of the // batch to be signalled at the end of the command list. ur_result_t setSignalEvent(ur_queue_handle_legacy_t Queue, bool UseCopyEngine, - ze_event_handle_t *ZeEvent, ur_event_handle_t *Event, + ze_event_handle_t *ZeEvent, + ur_event_handle_legacy_t *Event, uint32_t NumEventsInWaitList, - const ur_event_handle_t *EventWaitList, + const ur_event_handle_legacy_t *EventWaitList, ze_command_queue_handle_t ZeQueue) { if (!UrL0OutOfOrderIntegratedSignalEvent && Queue->Device->isIntegrated() && eventCanBeBatched(Queue, UseCopyEngine, NumEventsInWaitList, @@ -1847,11 +1854,11 @@ ur_result_t setSignalEvent(ur_queue_handle_legacy_t Queue, bool UseCopyEngine, return UR_RESULT_SUCCESS; } -// This helper function creates a ur_event_handle_t and associate a +// This helper function creates a ur_event_handle_legacy_t and associate a // ur_queue_handle_t. Note that the caller of this function must have acquired // lock on the Queue that is passed in. // \param Queue ur_queue_handle_t to associate with a new event. -// \param Event a pointer to hold the newly created ur_event_handle_t +// \param Event a pointer to hold the newly created ur_event_handle_legacy_t // \param CommandType various command type determined by the caller // \param CommandList is the command list where the event is added // \param IsInternal tells if the event is internal, i.e. visible in the L0 @@ -1861,7 +1868,7 @@ ur_result_t setSignalEvent(ur_queue_handle_legacy_t Queue, bool UseCopyEngine, // \param HostVisible tells if the event must be created in the // host-visible pool. If not set then this function will decide. ur_result_t createEventAndAssociateQueue(ur_queue_handle_legacy_t Queue, - ur_event_handle_t *Event, + ur_event_handle_legacy_t *Event, ur_command_t CommandType, ur_command_list_ptr_t CommandList, bool IsInternal, bool IsMultiDevice, @@ -1888,9 +1895,9 @@ ur_result_t createEventAndAssociateQueue(ur_queue_handle_legacy_t Queue, (*Event)->IsMultiDevice = IsMultiDevice; (*Event)->CommandList = CommandList; // Discarded event doesn't own ze_event, it is used by multiple - // ur_event_handle_t objects. We destroy corresponding ze_event by releasing - // events from the events cache at queue destruction. Event in the cache owns - // the Level Zero event. + // ur_event_handle_legacy_t objects. We destroy corresponding ze_event by + // releasing events from the events cache at queue destruction. Event in the + // cache owns the Level Zero event. if (IsInternal) (*Event)->OwnNativeHandle = false; @@ -1901,8 +1908,8 @@ ur_result_t createEventAndAssociateQueue(ur_queue_handle_legacy_t Queue, } // We need to increment the reference counter here to avoid ur_queue_handle_t - // being released before the associated ur_event_handle_t is released because - // urEventRelease requires access to the associated ur_queue_handle_t. + // being released before the associated ur_event_handle_legacy_t is released + // because urEventRelease requires access to the associated ur_queue_handle_t. // In urEventRelease, the reference counter of the Queue is decremented // to release it. Queue->RefCount.increment(); @@ -1958,7 +1965,7 @@ ur_queue_handle_legacy_t_::signalEventFromCmdListIfLastEventDiscarded( // participating in the discarded events reset/reuse logic, but // with no host-visibility since it is not going to be waited // from the host. - ur_event_handle_t Event; + ur_event_handle_legacy_t Event; UR_CALL(createEventAndAssociateQueue( reinterpret_cast(this), &Event, UR_EXT_COMMAND_TYPE_USER, CommandList, @@ -1988,7 +1995,8 @@ ur_result_t ur_queue_handle_legacy_t_::executeOpenCommandList(bool IsCopy) { ur_result_t ur_queue_handle_legacy_t_::resetCommandList( ur_command_list_ptr_t CommandList, bool MakeAvailable, - std::vector &EventListToCleanup, bool CheckStatus) { + std::vector &EventListToCleanup, + bool CheckStatus) { bool UseCopyEngine = CommandList->second.isCopy(this); // Immediate commandlists do not have an associated fence. @@ -2016,7 +2024,7 @@ ur_result_t ur_queue_handle_legacy_t_::resetCommandList( } else if (!isDiscardEvents()) { // If events in the queue are discarded then we can't check their status. // Helper for checking of event completion - auto EventCompleted = [](ur_event_handle_t Event) -> bool { + auto EventCompleted = [](ur_event_handle_legacy_t Event) -> bool { std::scoped_lock EventLock(Event->Mutex); ze_result_t ZeResult = Event->Completed @@ -2096,15 +2104,15 @@ bool ur_command_list_info_t::isCopy(ur_queue_handle_legacy_t Queue) const { .ZeOrdinal; } -void ur_command_list_info_t::append(ur_event_handle_t Event) { +void ur_command_list_info_t::append(ur_event_handle_legacy_t Event) { if (completions) { completions->append(Event); } EventList.push_back(Event); } -ur_command_list_ptr_t -ur_queue_handle_legacy_t_::eventOpenCommandList(ur_event_handle_t Event) { +ur_command_list_ptr_t ur_queue_handle_legacy_t_::eventOpenCommandList( + ur_event_handle_legacy_t Event) { using IsCopy = bool; if (UsingImmCmdLists) { @@ -2306,7 +2314,7 @@ ur_queue_handle_legacy_t_::insertActiveBarriers(ur_command_list_ptr_t &CmdList, ActiveBarriers.add(Event); } - ur_event_handle_t Event = nullptr; + ur_event_handle_legacy_t Event = nullptr; if (auto Res = createEventAndAssociateQueue( reinterpret_cast(this), &Event, UR_EXT_COMMAND_TYPE_USER, CmdList, diff --git a/source/adapters/level_zero/queue.hpp b/source/adapters/level_zero/queue.hpp index 97ddcf014c..93edaa3d89 100644 --- a/source/adapters/level_zero/queue.hpp +++ b/source/adapters/level_zero/queue.hpp @@ -31,6 +31,9 @@ struct ur_queue_handle_legacy_t_; using ur_queue_handle_legacy_t = ur_queue_handle_legacy_t_ *; +struct ur_event_handle_legacy_t_; +using ur_event_handle_legacy_t = ur_event_handle_legacy_t_ *; + extern "C" { ur_result_t urQueueReleaseInternal(ur_queue_handle_legacy_t Queue); } // extern "C" @@ -88,7 +91,7 @@ struct ur_completion_batch { // Internal barrier event that is signaled on completion of the batched // events. - ur_event_handle_t barrierEvent; + ur_event_handle_legacy_t barrierEvent; // Current batch state. Don't use directly. state st; @@ -117,16 +120,16 @@ struct ur_completion_batches { // returned to indicate that there are no batches available. // This is safe, but will increase how many events are associated // with the active batch. - ur_result_t tryCleanup(ur_queue_handle_legacy_t queue, - ze_command_list_handle_t cmdlist, - std::vector &EventList, - std::vector &EventListToCleanup); + ur_result_t + tryCleanup(ur_queue_handle_legacy_t queue, ze_command_list_handle_t cmdlist, + std::vector &EventList, + std::vector &EventListToCleanup); // Adds an event to the the active batch. // Ideally, all events that are appended here are then provided in the // vector for cleanup. Otherwise the event batch will simply ignore // missing events when it comes time for cleanup. - void append(ur_event_handle_t event); + void append(ur_event_handle_legacy_t event); // Resets all the batches without waiting for event completion. // Only safe when the command list was fully synchronized through @@ -137,13 +140,15 @@ struct ur_completion_batches { // Checks the state of all previously sealed batches. If any are complete, // moves the associated events from the EventList to EventListToCleanup, // and then resets the batch for reuse. - ur_result_t cleanup(std::vector &EventList, - std::vector &EventListToCleanup); + ur_result_t + cleanup(std::vector &EventList, + std::vector &EventListToCleanup); // Moves the completed events from EventList to EventListToCleanup. - void moveCompletedEvents(ur_completion_batch_it it, - std::vector &EventList, - std::vector &EventListToCleanup); + void moveCompletedEvents( + ur_completion_batch_it it, + std::vector &EventList, + std::vector &EventListToCleanup); // Find or creates an empty batch. This might fail if there are now empty // batches and a batch limit has been reached. @@ -158,7 +163,7 @@ ur_result_t resetCommandLists(ur_queue_handle_legacy_t Queue); ur_result_t CleanupEventsInImmCmdLists(ur_queue_handle_legacy_t UrQueue, bool QueueLocked = false, bool QueueSynced = false, - ur_event_handle_t CompletedEvent = nullptr); + ur_event_handle_legacy_t CompletedEvent = nullptr); // Structure describing the specific use of a command-list in a queue. // This is because command-lists are re-used across multiple queues @@ -219,9 +224,9 @@ struct ur_command_list_info_t { // completion. // TODO: use this for optimizing events in the same command-list, e.g. // only have last one visible to the host. - std::vector EventList; + std::vector EventList; size_t size() const { return EventList.size(); } - void append(ur_event_handle_t Event); + void append(ur_event_handle_legacy_t Event); }; // The map type that would track all command-lists in a queue. @@ -556,7 +561,7 @@ struct ur_queue_handle_legacy_t_ : _ur_object, public ur_queue_handle_t_ { // this queue. this is used to add dependency with the last command to add // in-order semantics and updated with the latest event each time a new // command is enqueued. - ur_event_handle_t LastCommandEvent = nullptr; + ur_event_handle_legacy_t LastCommandEvent = nullptr; // Indicates if we own the ZeCommandQueue or it came from interop that // asked to not transfer the ownership to SYCL RT. @@ -599,11 +604,11 @@ struct ur_queue_handle_legacy_t_ : _ur_object, public ur_queue_handle_t_ { // A helper structure to keep active barriers of the queue. // It additionally manages ref-count of events in this list. struct active_barriers { - std::vector Events; - void add(ur_event_handle_t &Event); + std::vector Events; + void add(ur_event_handle_legacy_t &Event); ur_result_t clear(); bool empty() { return Events.empty(); } - std::vector &vector() { return Events; } + std::vector &vector() { return Events; } }; // A collection of currently active barriers. // These should be inserted into a command list whenever an available command @@ -676,7 +681,7 @@ struct ur_queue_handle_legacy_t_ : _ur_object, public ur_queue_handle_t_ { // requested type of event. Each list contains events which can be reused // inside all command lists in the queue as described in the 2-event model. // Leftover events in the cache are relased at the queue destruction. - std::vector> EventCaches{2}; + std::vector> EventCaches{2}; std::vector> EventCachesDeviceMap{2}; @@ -691,7 +696,7 @@ struct ur_queue_handle_legacy_t_ : _ur_object, public ur_queue_handle_t_ { // case the event will mark this for deletion when the queue sees fit. bool EventHasDied = false; }; - std::map EndTimeRecordings; + std::map EndTimeRecordings; // Clear the end time recording timestamps entries. void clearEndTimeRecordings(); @@ -737,7 +742,7 @@ struct ur_queue_handle_legacy_t_ : _ur_object, public ur_queue_handle_t_ { // not used by any command but its ZeEvent is used by many ur_event_handle_t // objects. Commands to wait and reset ZeEvent must be submitted to the queue // before calling this method. - ur_result_t addEventToQueueCache(ur_event_handle_t Event); + ur_result_t addEventToQueueCache(ur_event_handle_legacy_t Event); // Returns true if any commands for this queue are allowed to // be batched together. @@ -769,8 +774,8 @@ struct ur_queue_handle_legacy_t_ : _ur_object, public ur_queue_handle_t_ { // two times in a row and have to do round-robin between two events. Otherwise // it picks an event from the beginning of the cache and returns it. Event // from the last command is always appended to the end of the list. - ur_event_handle_t getEventFromQueueCache(bool IsMultiDevice, - bool HostVisible); + ur_event_handle_legacy_t getEventFromQueueCache(bool IsMultiDevice, + bool HostVisible); // Returns true if an OpenCommandList has commands that need to be submitted. // If IsCopy is 'true', then the OpenCommandList containing copy commands is @@ -834,11 +839,11 @@ struct ur_queue_handle_legacy_t_ : _ur_object, public ur_queue_handle_t_ { /// @return PI_SUCCESS if successful, PI error code otherwise. ur_result_t resetCommandList(ur_command_list_ptr_t CommandList, bool MakeAvailable, - std::vector &EventListToCleanup, + std::vector &EventListToCleanup, bool CheckStatus = true); // Gets the open command containing the event, or CommandListMap.end() - ur_command_list_ptr_t eventOpenCommandList(ur_event_handle_t Event); + ur_command_list_ptr_t eventOpenCommandList(ur_event_handle_legacy_t Event); // Return the queue group to use based on standard/immediate commandlist mode, // and if immediate mode, the thread-specific group. @@ -883,18 +888,9 @@ struct ur_queue_handle_legacy_t_ : _ur_object, public ur_queue_handle_t_ { size_t getImmdCmmdListsEventCleanupThreshold(); }; -template QueueT GetQueue(ur_queue_handle_t Queue) { - if (!Queue) - return nullptr; - auto *Q = dynamic_cast(Queue); - if (!Q) { - throw UR_RESULT_ERROR_INVALID_QUEUE; - } - return Q; -} - +// Get legacy implementation static inline ur_queue_handle_legacy_t Legacy(ur_queue_handle_t Queue) { - return GetQueue(Queue); + return GetImpl(Queue); } // This helper function creates a ur_event_handle_t and associate a @@ -910,12 +906,11 @@ static inline ur_queue_handle_legacy_t Legacy(ur_queue_handle_t Queue) { // multiple devices. // \param ForceHostVisible tells if the event must be created in // the host-visible pool -ur_result_t -createEventAndAssociateQueue(ur_queue_handle_legacy_t Queue, - ur_event_handle_t *Event, ur_command_t CommandType, - ur_command_list_ptr_t CommandList, bool IsInternal, - bool IsMultiDevice, - std::optional HostVisible = std::nullopt); +ur_result_t createEventAndAssociateQueue( + ur_queue_handle_legacy_t Queue, ur_event_handle_legacy_t *Event, + ur_command_t CommandType, ur_command_list_ptr_t CommandList, + bool IsInternal, bool IsMultiDevice, + std::optional HostVisible = std::nullopt); // This helper function checks to see if an event for a command can be included // at the end of a command list batch. This will only be true if the event does @@ -923,7 +918,7 @@ createEventAndAssociateQueue(ur_queue_handle_legacy_t Queue, // this batch. bool eventCanBeBatched(ur_queue_handle_legacy_t Queue, bool UseCopyEngine, uint32_t NumEventsInWaitList, - const ur_event_handle_t *EventWaitList); + const ur_event_handle_legacy_t *EventWaitList); // This helper function checks to see if a signal event at the end of a command // should be set. If the Queue is out of order and the command has no @@ -931,13 +926,14 @@ bool eventCanBeBatched(ur_queue_handle_legacy_t Queue, bool UseCopyEngine, // a command list batch. The signal event will be appended at the end of the // batch to be signalled at the end of the command list. ur_result_t setSignalEvent(ur_queue_handle_legacy_t Queue, bool UseCopyEngine, - ze_event_handle_t *ZeEvent, ur_event_handle_t *Event, + ze_event_handle_t *ZeEvent, + ur_event_handle_legacy_t *Event, uint32_t NumEventsInWaitList, - const ur_event_handle_t *EventWaitList, + const ur_event_handle_legacy_t *EventWaitList, ze_command_queue_handle_t ZeQueue); // Helper function to perform the necessary cleanup of the events from reset cmd // list. ur_result_t CleanupEventListFromResetCmdList( - std::vector &EventListToCleanup, + std::vector &EventListToCleanup, bool QueueLocked = false);