diff --git a/openmp/libomptarget/include/OpenMP/OMPT/Interface.h b/openmp/libomptarget/include/OpenMP/OMPT/Interface.h index 13eca730a9295..327fadfcd4acd 100644 --- a/openmp/libomptarget/include/OpenMP/OMPT/Interface.h +++ b/openmp/libomptarget/include/OpenMP/OMPT/Interface.h @@ -54,12 +54,14 @@ class Interface { void **TgtPtrBegin, size_t Size, void *Code); /// Top-level function for invoking callback before data submit - void beginTargetDataSubmit(int64_t DeviceId, void *HstPtrBegin, - void *TgtPtrBegin, size_t Size, void *Code); + void beginTargetDataSubmit(int64_t SrcDeviceId, void *SrcPtrBegin, + int64_t DstDeviceId, void *DstPtrBegin, + size_t Size, void *Code); /// Top-level function for invoking callback after data submit - void endTargetDataSubmit(int64_t DeviceId, void *HstPtrBegin, - void *TgtPtrBegin, size_t Size, void *Code); + void endTargetDataSubmit(int64_t SrcDeviceId, void *SrcPtrBegin, + int64_t DstDeviceId, void *DstPtrBegin, size_t Size, + void *Code); /// Top-level function for invoking callback before device data deallocation void beginTargetDataDelete(int64_t DeviceId, void *TgtPtrBegin, void *Code); @@ -68,12 +70,14 @@ class Interface { void endTargetDataDelete(int64_t DeviceId, void *TgtPtrBegin, void *Code); /// Top-level function for invoking callback before data retrieve - void beginTargetDataRetrieve(int64_t DeviceId, void *HstPtrBegin, - void *TgtPtrBegin, size_t Size, void *Code); + void beginTargetDataRetrieve(int64_t SrcDeviceId, void *SrcPtrBegin, + int64_t DstDeviceId, void *DstPtrBegin, + size_t Size, void *Code); /// Top-level function for invoking callback after data retrieve - void endTargetDataRetrieve(int64_t DeviceId, void *HstPtrBegin, - void *TgtPtrBegin, size_t Size, void *Code); + void endTargetDataRetrieve(int64_t SrcDeviceId, void *SrcPtrBegin, + int64_t DstDeviceId, void *DstPtrBegin, + size_t Size, void *Code); /// Top-level function for invoking callback before kernel dispatch void beginTargetSubmit(unsigned int NumTeams = 1); diff --git a/openmp/libomptarget/src/OpenMP/OMPT/Callback.cpp b/openmp/libomptarget/src/OpenMP/OMPT/Callback.cpp index 66435d2a4fe64..f285843e39f38 100644 --- a/openmp/libomptarget/src/OpenMP/OMPT/Callback.cpp +++ b/openmp/libomptarget/src/OpenMP/OMPT/Callback.cpp @@ -119,41 +119,38 @@ void Interface::endTargetDataAlloc(int64_t DeviceId, void *HstPtrBegin, endTargetDataOperation(); } -void Interface::beginTargetDataSubmit(int64_t DeviceId, void *TgtPtrBegin, - void *HstPtrBegin, size_t Size, - void *Code) { +void Interface::beginTargetDataSubmit(int64_t SrcDeviceId, void *SrcPtrBegin, + int64_t DstDeviceId, void *DstPtrBegin, + size_t Size, void *Code) { beginTargetDataOperation(); if (ompt_callback_target_data_op_emi_fn) { // HostOpId will be set by the tool. Invoke the tool supplied data op EMI // callback ompt_callback_target_data_op_emi_fn( ompt_scope_begin, TargetTaskData, &TargetData, &HostOpId, - ompt_target_data_transfer_to_device, HstPtrBegin, - /*SrcDeviceNum=*/omp_get_initial_device(), TgtPtrBegin, DeviceId, Size, - Code); + ompt_target_data_transfer_to_device, SrcPtrBegin, SrcDeviceId, + DstPtrBegin, DstDeviceId, Size, Code); } else if (ompt_callback_target_data_op_fn) { // HostOpId is set by the runtime HostOpId = createOpId(); // Invoke the tool supplied data op callback ompt_callback_target_data_op_fn( TargetData.value, HostOpId, ompt_target_data_transfer_to_device, - HstPtrBegin, /*SrcDeviceNum=*/omp_get_initial_device(), TgtPtrBegin, - DeviceId, Size, Code); + SrcPtrBegin, SrcDeviceId, DstPtrBegin, DstDeviceId, Size, Code); } } -void Interface::endTargetDataSubmit(int64_t DeviceId, void *TgtPtrBegin, - void *HstPtrBegin, size_t Size, - void *Code) { +void Interface::endTargetDataSubmit(int64_t SrcDeviceId, void *SrcPtrBegin, + int64_t DstDeviceId, void *DstPtrBegin, + size_t Size, void *Code) { // Only EMI callback handles end scope if (ompt_callback_target_data_op_emi_fn) { // HostOpId will be set by the tool. Invoke the tool supplied data op EMI // callback ompt_callback_target_data_op_emi_fn( ompt_scope_end, TargetTaskData, &TargetData, &HostOpId, - ompt_target_data_transfer_to_device, HstPtrBegin, - /*SrcDeviceNum=*/omp_get_initial_device(), TgtPtrBegin, DeviceId, Size, - Code); + ompt_target_data_transfer_to_device, SrcPtrBegin, SrcDeviceId, + DstPtrBegin, DstDeviceId, Size, Code); } endTargetDataOperation(); } @@ -193,41 +190,38 @@ void Interface::endTargetDataDelete(int64_t DeviceId, void *TgtPtrBegin, endTargetDataOperation(); } -void Interface::beginTargetDataRetrieve(int64_t DeviceId, void *HstPtrBegin, - void *TgtPtrBegin, size_t Size, - void *Code) { +void Interface::beginTargetDataRetrieve(int64_t SrcDeviceId, void *SrcPtrBegin, + int64_t DstDeviceId, void *DstPtrBegin, + size_t Size, void *Code) { beginTargetDataOperation(); if (ompt_callback_target_data_op_emi_fn) { // HostOpId will be set by the tool. Invoke the tool supplied data op EMI // callback ompt_callback_target_data_op_emi_fn( ompt_scope_begin, TargetTaskData, &TargetData, &HostOpId, - ompt_target_data_transfer_from_device, TgtPtrBegin, DeviceId, - HstPtrBegin, - /*TgtDeviceNum=*/omp_get_initial_device(), Size, Code); + ompt_target_data_transfer_from_device, SrcPtrBegin, SrcDeviceId, + DstPtrBegin, DstDeviceId, Size, Code); } else if (ompt_callback_target_data_op_fn) { // HostOpId is set by the runtime HostOpId = createOpId(); // Invoke the tool supplied data op callback ompt_callback_target_data_op_fn( TargetData.value, HostOpId, ompt_target_data_transfer_from_device, - TgtPtrBegin, DeviceId, HstPtrBegin, - /*TgtDeviceNum=*/omp_get_initial_device(), Size, Code); + SrcPtrBegin, SrcDeviceId, DstPtrBegin, DstDeviceId, Size, Code); } } -void Interface::endTargetDataRetrieve(int64_t DeviceId, void *HstPtrBegin, - void *TgtPtrBegin, size_t Size, - void *Code) { +void Interface::endTargetDataRetrieve(int64_t SrcDeviceId, void *SrcPtrBegin, + int64_t DstDeviceId, void *DstPtrBegin, + size_t Size, void *Code) { // Only EMI callback handles end scope if (ompt_callback_target_data_op_emi_fn) { // HostOpId will be set by the tool. Invoke the tool supplied data op EMI // callback ompt_callback_target_data_op_emi_fn( ompt_scope_end, TargetTaskData, &TargetData, &HostOpId, - ompt_target_data_transfer_from_device, TgtPtrBegin, DeviceId, - HstPtrBegin, - /*TgtDeviceNum=*/omp_get_initial_device(), Size, Code); + ompt_target_data_transfer_from_device, SrcPtrBegin, SrcDeviceId, + DstPtrBegin, DstDeviceId, Size, Code); } endTargetDataOperation(); } diff --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp index 5fe3f508b739c..3345277d91d3a 100644 --- a/openmp/libomptarget/src/device.cpp +++ b/openmp/libomptarget/src/device.cpp @@ -151,7 +151,7 @@ int32_t DeviceTy::submitData(void *TgtPtrBegin, void *HstPtrBegin, int64_t Size, OMPT_IF_BUILT( InterfaceRAII TargetDataSubmitRAII( RegionInterface.getCallbacks(), - DeviceID, TgtPtrBegin, HstPtrBegin, Size, + omp_get_initial_device(), HstPtrBegin, DeviceID, TgtPtrBegin, Size, /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);) if (!AsyncInfo || !RTL->data_submit_async || !RTL->synchronize) @@ -173,7 +173,7 @@ int32_t DeviceTy::retrieveData(void *HstPtrBegin, void *TgtPtrBegin, OMPT_IF_BUILT( InterfaceRAII TargetDataRetrieveRAII( RegionInterface.getCallbacks(), - DeviceID, HstPtrBegin, TgtPtrBegin, Size, + DeviceID, TgtPtrBegin, omp_get_initial_device(), HstPtrBegin, Size, /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);) if (!RTL->data_retrieve_async || !RTL->synchronize) @@ -185,6 +185,17 @@ int32_t DeviceTy::retrieveData(void *HstPtrBegin, void *TgtPtrBegin, // Copy data from current device to destination device directly int32_t DeviceTy::dataExchange(void *SrcPtr, DeviceTy &DstDev, void *DstPtr, int64_t Size, AsyncInfoTy &AsyncInfo) { + /// RAII to establish tool anchors before and after data exchange + /// Note: Despite the fact that this is a data exchange, we use 'from_device' + /// operation enum (w.r.t. ompt_target_data_op_t) as there is currently + /// no better alternative. It is still possible to distinguish this + /// scenario from a real data retrieve by checking if both involved + /// device numbers are less than omp_get_num_devices(). + OMPT_IF_BUILT( + InterfaceRAII TargetDataExchangeRAII( + RegionInterface.getCallbacks(), + RTLDeviceID, SrcPtr, DstDev.RTLDeviceID, DstPtr, Size, + /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);) if (!AsyncInfo || !RTL->data_exchange_async || !RTL->synchronize) { assert(RTL->data_exchange && "RTL->data_exchange is nullptr"); return RTL->data_exchange(RTLDeviceID, SrcPtr, DstDev.RTLDeviceID, DstPtr, diff --git a/openmp/libomptarget/test/ompt/callbacks.h b/openmp/libomptarget/test/ompt/callbacks.h index 1f9b7c177b286..95437d9cdcfb1 100644 --- a/openmp/libomptarget/test/ompt/callbacks.h +++ b/openmp/libomptarget/test/ompt/callbacks.h @@ -81,11 +81,14 @@ static void on_ompt_callback_target_data_op_emi( assert(codeptr_ra != 0 && "Unexpected null codeptr"); if (endpoint == ompt_scope_begin) *host_op_id = next_op_id++; + // target_task_data may be null, avoid dereferencing it + uint64_t target_task_data_value = + (target_task_data) ? target_task_data->value : 0; printf(" Callback DataOp EMI: endpoint=%d optype=%d target_task_data=%p " "(0x%lx) target_data=%p (0x%lx) host_op_id=%p (0x%lx) src=%p " "src_device_num=%d " "dest=%p dest_device_num=%d bytes=%lu code=%p\n", - endpoint, optype, target_task_data, target_task_data->value, + endpoint, optype, target_task_data, target_task_data_value, target_data, target_data->value, host_op_id, *host_op_id, src_addr, src_device_num, dest_addr, dest_device_num, bytes, codeptr_ra); } diff --git a/openmp/libomptarget/test/ompt/target_memcpy.c b/openmp/libomptarget/test/ompt/target_memcpy.c index 444f4b7bdbda3..80a8d6a4b32e5 100644 --- a/openmp/libomptarget/test/ompt/target_memcpy.c +++ b/openmp/libomptarget/test/ompt/target_memcpy.c @@ -33,6 +33,10 @@ int main() { if (omp_target_memcpy(dev_ptr, &host_var1, sizeof(int), 0, 0, dev, host)) abort(); + // D2D transfer + if (omp_target_memcpy(dev_ptr, dev_ptr, sizeof(int), 0, 0, dev, dev)) + abort(); + // D2H transfer if (omp_target_memcpy(&host_var2, dev_ptr, sizeof(int), 0, 0, host, dev)) abort(); @@ -46,16 +50,25 @@ int main() { // clang-format off /// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=1 +/// CHECK-SAME: src_device_num=[[HOST:[0-9]+]] +/// CHECK-SAME: dest_device_num=[[DEVICE:[0-9]+]] /// CHECK-NOT: code=(nil) -/// CHECK: code=[[CODE1:.*]] +/// CHECK: code=[[CODE1:0x[0-f]+]] /// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=2 +/// CHECK-SAME: src_device_num=[[HOST]] {{.+}} dest_device_num=[[DEVICE]] /// CHECK-NOT: code=(nil) /// CHECK-NOT: code=[[CODE1]] -/// CHECK: code=[[CODE2:.*]] +/// CHECK: code=[[CODE2:0x[0-f]+]] /// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=3 +/// CHECK-SAME: src_device_num=[[DEVICE]] {{.+}} dest_device_num=[[DEVICE]] /// CHECK-NOT: code=(nil) /// CHECK-NOT: code=[[CODE2]] -/// CHECK: code=[[CODE3:.*]] -/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=4 +/// CHECK: code=[[CODE3:0x[0-f]+]] +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=3 +/// CHECK-SAME: src_device_num=[[DEVICE]] {{.+}} dest_device_num=[[HOST]] /// CHECK-NOT: code=(nil) /// CHECK-NOT: code=[[CODE3]] +/// CHECK: code=[[CODE4:0x[0-f]+]] +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=4 +/// CHECK-NOT: code=(nil) +/// CHECK-NOT: code=[[CODE4]] diff --git a/openmp/libomptarget/test/ompt/target_memcpy_emi.c b/openmp/libomptarget/test/ompt/target_memcpy_emi.c new file mode 100644 index 0000000000000..5347f38b87b6f --- /dev/null +++ b/openmp/libomptarget/test/ompt/target_memcpy_emi.c @@ -0,0 +1,85 @@ +// RUN: %libomptarget-compile-run-and-check-generic +// REQUIRES: ompt +// UNSUPPORTED: aarch64-unknown-linux-gnu +// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO +// UNSUPPORTED: x86_64-pc-linux-gnu +// UNSUPPORTED: x86_64-pc-linux-gnu-LTO + +/* + * Verify all three data transfer directions: H2D, D2D and D2H + */ + +#include +#include +#include + +#include "callbacks.h" +#include "register_emi.h" + +int main(void) { + int NumDevices = omp_get_num_devices(); + assert(NumDevices > 0 && "No device(s) present."); + int Device = omp_get_default_device(); + int Host = omp_get_initial_device(); + // Note: Zero value depicts an OFFLOAD_SUCCESS + int Status; + + printf("Allocating Memory on Device\n"); + int *DevPtr = (int *)omp_target_alloc(sizeof(int), Device); + assert(DevPtr && "Could not allocate memory on device."); + int *HstPtr = (int *)malloc(sizeof(int)); + *HstPtr = 42; + + printf("Testing: Host to Device\n"); + Status = omp_target_memcpy(DevPtr, HstPtr, sizeof(int), 0, 0, Device, Host); + assert(Status == 0 && "H2D memory copy operation failed.\n"); + + printf("Testing: Device to Device\n"); + Status = omp_target_memcpy(DevPtr, DevPtr, sizeof(int), 0, 0, Device, Device); + assert(Status == 0 && "D2D memory copy operation failed.\n"); + + printf("Testing: Device to Host\n"); + Status = omp_target_memcpy(HstPtr, DevPtr, sizeof(int), 0, 0, Host, Device); + assert(Status == 0 && "D2H memory copy operation failed.\n"); + + printf("Checking Correctness\n"); + assert(*HstPtr == 42); + + printf("Freeing Memory on Device\n"); + free(HstPtr); + omp_target_free(DevPtr, Device); + + return 0; +} + +// clang-format off + +/// CHECK: Callback Init: + +/// CHECK: Allocating Memory on Device +/// CHECK: Callback DataOp EMI: endpoint=1 optype=1 +/// CHECK-SAME: src_device_num=[[HOST:[0-9]+]] +/// CHECK-SAME: dest_device_num=[[DEVICE:[0-9]+]] +/// CHECK: Callback DataOp EMI: endpoint=2 optype=1 {{.+}} src_device_num=[[HOST]] {{.+}} dest_device_num=[[DEVICE]] + +/// CHECK: Testing: Host to Device +/// CHECK: Callback DataOp EMI: endpoint=1 optype=2 {{.+}} src_device_num=[[HOST]] {{.+}} dest_device_num=[[DEVICE]] +/// CHECK: Callback DataOp EMI: endpoint=2 optype=2 {{.+}} src_device_num=[[HOST]] {{.+}} dest_device_num=[[DEVICE]] + +/// CHECK: Testing: Device to Device +/// CHECK: Callback DataOp EMI: endpoint=1 optype=3 {{.+}} src_device_num=[[DEVICE]] {{.+}} dest_device_num=[[DEVICE]] +/// CHECK: Callback DataOp EMI: endpoint=2 optype=3 {{.+}} src_device_num=[[DEVICE]] {{.+}} dest_device_num=[[DEVICE]] + +/// CHECK: Testing: Device to Host +/// CHECK: Callback DataOp EMI: endpoint=1 optype=3 {{.+}} src_device_num=[[DEVICE]] {{.+}} dest_device_num=[[HOST]] +/// CHECK: Callback DataOp EMI: endpoint=2 optype=3 {{.+}} src_device_num=[[DEVICE]] {{.+}} dest_device_num=[[HOST]] + +/// CHECK: Checking Correctness + +/// CHECK: Freeing Memory on Device +/// CHECK: Callback DataOp EMI: endpoint=1 optype=4 {{.+}} src_device_num=[[DEVICE]] +/// CHECK: Callback DataOp EMI: endpoint=2 optype=4 {{.+}} src_device_num=[[DEVICE]] + +/// CHECK: Callback Fini: + +// clang-format on