From b81670000f9d02de178f3dfcd5dd4182ccdac5e2 Mon Sep 17 00:00:00 2001 From: "Neil R. Spruit" Date: Fri, 26 Jul 2024 10:32:24 -0700 Subject: [PATCH] [L0] Add check for Intel Flex/Arc for disabling use of copy engines. Signed-off-by: Neil R. Spruit --- source/adapters/level_zero/command_buffer.cpp | 7 ++++++ source/adapters/level_zero/device.hpp | 3 +++ source/adapters/level_zero/kernel.cpp | 10 ++++++++ source/adapters/level_zero/memory.cpp | 25 +++++++++++++++++++ source/adapters/level_zero/memory.hpp | 1 + 5 files changed, 46 insertions(+) diff --git a/source/adapters/level_zero/command_buffer.cpp b/source/adapters/level_zero/command_buffer.cpp index 3b4a91fc0a..5c4790708b 100644 --- a/source/adapters/level_zero/command_buffer.cpp +++ b/source/adapters/level_zero/command_buffer.cpp @@ -921,6 +921,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( bool PreferCopyEngine = !IsDevicePointer(CommandBuffer->Context, Src) || !IsDevicePointer(CommandBuffer->Context, Dst); + // For better performance, Copy Engines are not preferred given Shared + // pointers on DG2. + if (CommandBuffer->Device->isDG2() && + (IsSharedPointer(CommandBuffer->Context, Src) || + IsSharedPointer(CommandBuffer->Context, Dst))) { + PreferCopyEngine = false; + } PreferCopyEngine |= UseCopyEngineForD2DCopy; return enqueueCommandBufferMemCopyHelper( diff --git a/source/adapters/level_zero/device.hpp b/source/adapters/level_zero/device.hpp index 4672a0a4f6..694906a9db 100644 --- a/source/adapters/level_zero/device.hpp +++ b/source/adapters/level_zero/device.hpp @@ -189,6 +189,9 @@ struct ur_device_handle_t_ : _ur_object { (ZeDeviceProperties->deviceId & 0xff0) == 0xb60; } + // Checks if this GPU is an Intel Flex GPU or Intel Arc Alchemist + bool isDG2() { return (ZeDeviceProperties->deviceId & 0xff00) == 0x5600; } + bool isIntegrated() { return (ZeDeviceProperties->flags & ZE_DEVICE_PROPERTY_FLAG_INTEGRATED); } diff --git a/source/adapters/level_zero/kernel.cpp b/source/adapters/level_zero/kernel.cpp index cb020395ed..8e61f99294 100644 --- a/source/adapters/level_zero/kernel.cpp +++ b/source/adapters/level_zero/kernel.cpp @@ -613,6 +613,11 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueDeviceGlobalVariableWrite( // Copy engine is preferred only for host to device transfer. // Device to device transfers run faster on compute engines. bool PreferCopyEngine = !IsDevicePointer(Queue->Context, Src); + // For better performance, Copy Engines are not preferred given Shared + // pointers on DG2. + if (Queue->Device->isDG2() && IsSharedPointer(Queue->Context, Src)) { + PreferCopyEngine = false; + } // Temporary option added to use copy engine for D2D copy PreferCopyEngine |= UseCopyEngineForD2DCopy; @@ -663,6 +668,11 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueDeviceGlobalVariableRead( // Copy engine is preferred only for host to device transfer. // Device to device transfers run faster on compute engines. bool PreferCopyEngine = !IsDevicePointer(Queue->Context, Dst); + // For better performance, Copy Engines are not preferred given Shared + // pointers on DG2. + if (Queue->Device->isDG2() && IsSharedPointer(Queue->Context, Dst)) { + PreferCopyEngine = false; + } // Temporary option added to use copy engine for D2D copy PreferCopyEngine |= UseCopyEngineForD2DCopy; diff --git a/source/adapters/level_zero/memory.cpp b/source/adapters/level_zero/memory.cpp index 95650a7b94..585a10ef4f 100644 --- a/source/adapters/level_zero/memory.cpp +++ b/source/adapters/level_zero/memory.cpp @@ -42,6 +42,19 @@ bool IsDevicePointer(ur_context_handle_t Context, const void *Ptr) { return (ZeMemoryAllocationProperties.type == ZE_MEMORY_TYPE_DEVICE); } +// Helper function to check if a pointer is a shared pointer. +bool IsSharedPointer(ur_context_handle_t Context, const void *Ptr) { + ze_device_handle_t ZeDeviceHandle; + ZeStruct ZeMemoryAllocationProperties; + + // Query memory type of the pointer + ZE2UR_CALL(zeMemGetAllocProperties, + (Context->ZeContext, Ptr, &ZeMemoryAllocationProperties, + &ZeDeviceHandle)); + + return (ZeMemoryAllocationProperties.type == ZE_MEMORY_TYPE_SHARED); +} + // Shared by all memory read/write/copy PI interfaces. // PI interfaces must have queue's and destination buffer's mutexes locked for // exclusive use and source buffer's mutex locked for shared use on entry. @@ -1191,6 +1204,12 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueUSMMemcpy( // (versus compute engine). bool PreferCopyEngine = !IsDevicePointer(Queue->Context, Src) || !IsDevicePointer(Queue->Context, Dst); + // For better performance, Copy Engines are not preferred given Shared + // pointers on DG2. + if (Queue->Device->isDG2() && (IsSharedPointer(Queue->Context, Src) || + IsSharedPointer(Queue->Context, Dst))) { + PreferCopyEngine = false; + } // Temporary option added to use copy engine for D2D copy PreferCopyEngine |= UseCopyEngineForD2DCopy; @@ -1390,6 +1409,12 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueUSMMemcpy2D( // (versus compute engine). bool PreferCopyEngine = !IsDevicePointer(Queue->Context, Src) || !IsDevicePointer(Queue->Context, Dst); + // For better performance, Copy Engines are not preferred given Shared + // pointers on DG2. + if (Queue->Device->isDG2() && (IsSharedPointer(Queue->Context, Src) || + IsSharedPointer(Queue->Context, Dst))) { + PreferCopyEngine = false; + } // Temporary option added to use copy engine for D2D copy PreferCopyEngine |= UseCopyEngineForD2DCopy; diff --git a/source/adapters/level_zero/memory.hpp b/source/adapters/level_zero/memory.hpp index b590165947..43d548f16b 100644 --- a/source/adapters/level_zero/memory.hpp +++ b/source/adapters/level_zero/memory.hpp @@ -32,6 +32,7 @@ using ur_queue_handle_legacy_t = ur_queue_handle_legacy_t_ *; struct ur_device_handle_t_; bool IsDevicePointer(ur_context_handle_t Context, const void *Ptr); +bool IsSharedPointer(ur_context_handle_t Context, const void *Ptr); // This is an experimental option to test performance of device to device copy // operations on copy engines (versus compute engine)