diff --git a/openmp/libomptarget/include/device.h b/openmp/libomptarget/include/device.h index cd76d88618be4..56a4f5ba4242c 100644 --- a/openmp/libomptarget/include/device.h +++ b/openmp/libomptarget/include/device.h @@ -447,6 +447,10 @@ struct DeviceTy { /// - Data allocation failed; /// - The user tried to do an illegal mapping; /// - Data transfer issue fails. + /// If unified shared memory is enabled the data will not be transferred to + /// the device and will be used from the host. Data will be added to the + /// mapping table to allow checks to happen even when in unified shared + /// memory. TargetPointerResultTy getTargetPointer( HDTTMapAccessorTy &HDTTMap, void *HstPtrBegin, void *HstPtrBase, int64_t TgtPadding, int64_t Size, map_var_info_t HstPtrName, diff --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp index 8a2fe4620b39c..a8e93adcab5fc 100644 --- a/openmp/libomptarget/src/device.cpp +++ b/openmp/libomptarget/src/device.cpp @@ -268,6 +268,27 @@ TargetPointerResultTy DeviceTy::getTargetPointer( LR.TPR.getEntry()->holdRefCountToStr().c_str(), HoldRefCountAction, (HstPtrName) ? getNameFromMapping(HstPtrName).c_str() : "unknown"); LR.TPR.TargetPointer = (void *)Ptr; + + // When the target pointer is retrieved again, then the condition for this + // branch can be true hence preventing the unified shared memory to be + // taken at all. This ensures that the IsHostPointer and IsPresent flags + // are correctly set even in that situation. + if (PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && + !HasCloseModifier && !LR.TPR.Flags.IsHostPointer) { + // This is a host pointer and is not present if the pointers match: + if (LR.TPR.getEntry()->TgtPtrBegin == LR.TPR.getEntry()->HstPtrBegin) { + LR.TPR.Flags.IsPresent = false; + LR.TPR.Flags.IsHostPointer = true; + } + + // Catch the case where incoming HstPtrBegin is not consistent with the + // entry HstPtrBegin. + if (LR.TPR.Flags.IsHostPointer && + ((uintptr_t)HstPtrBegin - LR.TPR.getEntry()->HstPtrBegin) != 0) { + assert(false && + "Incoming HstPtrBegin different from entry HstPtrBegin"); + } + } } else if ((LR.Flags.ExtendsBefore || LR.Flags.ExtendsAfter) && !IsImplicit) { // Explicit extension of mapped data - not allowed. MESSAGE("explicit extension not allowed: host address specified is " DPxMOD @@ -289,13 +310,38 @@ TargetPointerResultTy DeviceTy::getTargetPointer( // In addition to the mapping rules above, the close map modifier forces the // mapping of the variable to the device. if (Size) { - DP("Return HstPtrBegin " DPxMOD " Size=%" PRId64 " for unified shared " - "memory\n", - DPxPTR((uintptr_t)HstPtrBegin), Size); - LR.TPR.Flags.IsPresent = false; + LR.TPR.Flags.IsNewEntry = true; + assert(TgtPadding == 0 && "TgtPadding must always be zero in USM mode"); + uintptr_t TgtPtrBegin = (uintptr_t)HstPtrBegin + TgtPadding; + LR.TPR.setEntry( + HDTTMap + ->emplace(new HostDataToTargetTy( + (uintptr_t)HstPtrBase, (uintptr_t)HstPtrBegin, + (uintptr_t)HstPtrBegin + Size, (uintptr_t)HstPtrBegin, + TgtPtrBegin, HasHoldModifier, HstPtrName)) + .first->HDTT); + INFO(OMP_INFOTYPE_MAPPING_CHANGED, DeviceID, + "Creating new map entry ONLY with HstPtrBase=" DPxMOD + ", HstPtrBegin=" DPxMOD ", TgtAllocBegin=" DPxMOD + ", TgtPtrBegin=" DPxMOD + ", Size=%ld, DynRefCount=%s, HoldRefCount=%s, Name=%s\n", + DPxPTR(HstPtrBase), DPxPTR(HstPtrBegin), DPxPTR(HstPtrBegin), + DPxPTR(TgtPtrBegin), Size, + LR.TPR.getEntry()->dynRefCountToStr().c_str(), + LR.TPR.getEntry()->holdRefCountToStr().c_str(), + (HstPtrName) ? getNameFromMapping(HstPtrName).c_str() : "unknown"); LR.TPR.Flags.IsHostPointer = true; + + // The following assert should catch any case in which the pointers + // do not match to understand if this case can ever happen. + assert((uintptr_t)HstPtrBegin == TgtPtrBegin && + "Pointers must always match"); + + // If the above assert is ever hit the following should be changed to = + // TgtPtrBegin LR.TPR.TargetPointer = HstPtrBegin; } + LR.TPR.Flags.IsPresent = false; } else if (HasPresentModifier) { DP("Mapping required by 'present' map type modifier does not exist for " "HstPtrBegin=" DPxMOD ", Size=%" PRId64 "\n", @@ -444,6 +490,29 @@ DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool UpdateRefCount, LR.TPR.getEntry()->dynRefCountToStr().c_str(), DynRefCountAction, LR.TPR.getEntry()->holdRefCountToStr().c_str(), HoldRefCountAction); LR.TPR.TargetPointer = (void *)TP; + + // If this entry is not marked as being host pointer (the way the + // implementation works today this is never true, mistake?) then we + // have to check if this is a host pointer or not. This is a host pointer + // if the host address matches the target address. + if ((PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) && + !LR.TPR.Flags.IsHostPointer) { + // If addresses match it means that we are dealing with a host pointer + // which has to be marked as one and present flag reset: + if (LR.TPR.getEntry()->TgtPtrBegin == LR.TPR.getEntry()->HstPtrBegin) { + LR.TPR.Flags.IsPresent = false; + LR.TPR.Flags.IsHostPointer = true; + } + + // We want to catch the case where (uintptr_t)HstPtrBegin and + // LR.TPR.getEntry()->HstPtrBegin are not the same when LR is a host + // pointer. This case should never happen. + if (LR.TPR.Flags.IsHostPointer && + ((uintptr_t)HstPtrBegin - LR.TPR.getEntry()->HstPtrBegin) != 0) { + assert(false && + "Incoming HstPtrBegin different from entry HstPtrBegin"); + } + } } else if (PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) { // If the value isn't found in the mapping and unified shared memory // is on then it means we have stumbled upon a value which we need to diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp index 65f2a49abc714..020b87e989f72 100644 --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -788,7 +788,9 @@ postProcessingTargetDataEnd(DeviceTy *Device, int Ret = OFFLOAD_SUCCESS; for (auto &[HstPtrBegin, DataSize, ArgType, TPR] : EntriesInfo) { - bool DelEntry = !TPR.isHostPointer(); + // Delete entry from the mapping table even when we are dealing with a + // host pointer. + bool DelEntry = true; // If the last element from the mapper (for end transfer args comes in // reverse order), do not remove the partial entry, the parent struct still @@ -846,10 +848,12 @@ postProcessingTargetDataEnd(DeviceTy *Device, Ret = Device->eraseMapEntry(HDTTMap, Entry, DataSize); // Entry is already remove from the map, we can unlock it now. HDTTMap.destroy(); - Ret |= Device->deallocTgtPtrAndEntry(Entry, DataSize); - if (Ret != OFFLOAD_SUCCESS) { - REPORT("Deallocating data from device failed.\n"); - break; + if (!TPR.Flags.IsHostPointer) { + Ret |= Device->deallocTgtPtrAndEntry(Entry, DataSize); + if (Ret != OFFLOAD_SUCCESS) { + REPORT("Deallocating data from device failed.\n"); + break; + } } } @@ -908,78 +912,92 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, Device.getTgtPtrBegin(HstPtrBegin, DataSize, UpdateRef, HasHoldModifier, !IsImplicit, ForceDelete, /*FromDataEnd=*/true); void *TgtPtrBegin = TPR.TargetPointer; - if (!TPR.isPresent() && !TPR.isHostPointer() && - (DataSize || HasPresentModifier)) { - DP("Mapping does not exist (%s)\n", - (HasPresentModifier ? "'present' map type modifier" : "ignored")); - if (HasPresentModifier) { - // OpenMP 5.1, sec. 2.21.7.1 "map Clause", p. 350 L10-13: - // "If a map clause appears on a target, target data, target enter data - // or target exit data construct with a present map-type-modifier then - // on entry to the region if the corresponding list item does not appear - // in the device data environment then an error occurs and the program - // terminates." - // - // This should be an error upon entering an "omp target exit data". It - // should not be an error upon exiting an "omp target data" or "omp - // target". For "omp target data", Clang thus doesn't include present - // modifiers for end calls. For "omp target", we have not found a valid - // OpenMP program for which the error matters: it appears that, if a - // program can guarantee that data is present at the beginning of an - // "omp target" region so that there's no error there, that data is also - // guaranteed to be present at the end. - MESSAGE("device mapping required by 'present' map type modifier does " - "not exist for host address " DPxMOD " (%" PRId64 " bytes)", - DPxPTR(HstPtrBegin), DataSize); - return OFFLOAD_FAIL; - } - } else { - DP("There are %" PRId64 " bytes allocated at target address " DPxMOD - " - is%s last\n", - DataSize, DPxPTR(TgtPtrBegin), (TPR.Flags.IsLast ? "" : " not")); - } - - // OpenMP 5.1, sec. 2.21.7.1 "map Clause", p. 351 L14-16: - // "If the map clause appears on a target, target data, or target exit data - // construct and a corresponding list item of the original list item is not - // present in the device data environment on exit from the region then the - // list item is ignored." - if (!TPR.isPresent()) - continue; - // Move data back to the host - const bool HasAlways = ArgTypes[I] & OMP_TGT_MAPTYPE_ALWAYS; - const bool HasFrom = ArgTypes[I] & OMP_TGT_MAPTYPE_FROM; - if (HasFrom && (HasAlways || TPR.Flags.IsLast) && - !TPR.Flags.IsHostPointer && DataSize != 0) { - DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n", - DataSize, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin)); - - // Wait for any previous transfer if an event is present. - if (void *Event = TPR.getEntry()->getEvent()) { - if (Device.waitEvent(Event, AsyncInfo) != OFFLOAD_SUCCESS) { - REPORT("Failed to wait for event " DPxMOD ".\n", DPxPTR(Event)); + // Check if HstPtrBegin matches the State HstPtrBegin or if any HstPtrBegin + // values have been registered: + bool HostPointerMismatch = true; + if (TPR.getEntry()) + HostPointerMismatch = + TPR.getEntry()->HstPtrBegin != (uintptr_t)HstPtrBegin; + + if (!TPR.isHostPointer()) { + if (!TPR.isPresent() && (DataSize || HasPresentModifier)) { + DP("Mapping does not exist (%s)\n", + (HasPresentModifier ? "'present' map type modifier" : "ignored")); + if (HasPresentModifier) { + // OpenMP 5.1, sec. 2.21.7.1 "map Clause", p. 350 L10-13: + // "If a map clause appears on a target, target data, target enter + // data or target exit data construct with a present map-type-modifier + // then on entry to the region if the corresponding list item does not + // appear in the device data environment then an error occurs and the + // program terminates." + // + // This should be an error upon entering an "omp target exit data". It + // should not be an error upon exiting an "omp target data" or "omp + // target". For "omp target data", Clang thus doesn't include present + // modifiers for end calls. For "omp target", we have not found a + // valid OpenMP program for which the error matters: it appears that, + // if a program can guarantee that data is present at the beginning of + // an "omp target" region so that there's no error there, that data is + // also guaranteed to be present at the end. + MESSAGE("device mapping required by 'present' map type modifier does " + "not exist for host address " DPxMOD " (%" PRId64 " bytes)", + DPxPTR(HstPtrBegin), DataSize); return OFFLOAD_FAIL; } + } else { + DP("There are %" PRId64 " bytes allocated at target address " DPxMOD + " - is%s last\n", + DataSize, DPxPTR(TgtPtrBegin), (TPR.Flags.IsLast ? "" : " not")); } - Ret = Device.retrieveData(HstPtrBegin, TgtPtrBegin, DataSize, AsyncInfo, - TPR.getEntry()); - if (Ret != OFFLOAD_SUCCESS) { - REPORT("Copying data from device failed.\n"); - return OFFLOAD_FAIL; - } + // OpenMP 5.1, sec. 2.21.7.1 "map Clause", p. 351 L14-16: + // "If the map clause appears on a target, target data, or target exit + // data construct and a corresponding list item of the original list item + // is not present in the device data environment on exit from the region + // then the list item is ignored." + if (!TPR.isPresent()) + continue; - // As we are expecting to delete the entry the d2h copy might race - // with another one that also tries to delete the entry. This happens - // as the entry can be reused and the reuse might happen after the - // copy-back was issued but before it completed. Since the reuse might - // also copy-back a value we would race. - if (TPR.Flags.IsLast) { - if (TPR.getEntry()->addEventIfNecessary(Device, AsyncInfo) != - OFFLOAD_SUCCESS) + // Move data back to the host + const bool HasAlways = ArgTypes[I] & OMP_TGT_MAPTYPE_ALWAYS; + const bool HasFrom = ArgTypes[I] & OMP_TGT_MAPTYPE_FROM; + if (HasFrom && (HasAlways || TPR.Flags.IsLast) && DataSize != 0) { + DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n", + DataSize, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin)); + + // Wait for any previous transfer if an event is present. + if (void *Event = TPR.getEntry()->getEvent()) { + if (Device.waitEvent(Event, AsyncInfo) != OFFLOAD_SUCCESS) { + REPORT("Failed to wait for event " DPxMOD ".\n", DPxPTR(Event)); + return OFFLOAD_FAIL; + } + } + + Ret = Device.retrieveData(HstPtrBegin, TgtPtrBegin, DataSize, AsyncInfo, + TPR.getEntry()); + if (Ret != OFFLOAD_SUCCESS) { + REPORT("Copying data from device failed.\n"); return OFFLOAD_FAIL; + } + + // As we are expecting to delete the entry the d2h copy might race + // with another one that also tries to delete the entry. This happens + // as the entry can be reused and the reuse might happen after the + // copy-back was issued but before it completed. Since the reuse might + // also copy-back a value we would race. + if (TPR.Flags.IsLast) { + if (TPR.getEntry()->addEventIfNecessary(Device, AsyncInfo) != + OFFLOAD_SUCCESS) + return OFFLOAD_FAIL; + } } + } else { + // Some zero-sized arrays are not mapped or added to the mapping table so + // they do not need to be removed. These arrays are not part of the + // current entry. + if (DataSize == 0 && !TPR.isPresent() && HostPointerMismatch) + continue; } // Add pointer to the buffer for post-synchronize processing. diff --git a/openmp/libomptarget/test/unified_shared_memory/unified_map_checks_arrays.cpp b/openmp/libomptarget/test/unified_shared_memory/unified_map_checks_arrays.cpp new file mode 100644 index 0000000000000..1c0257f9f246a --- /dev/null +++ b/openmp/libomptarget/test/unified_shared_memory/unified_map_checks_arrays.cpp @@ -0,0 +1,48 @@ +// clang-format off +// RUN: %libomptarget-compilexx-generic && env HSA_XNACK=1 LIBOMPTARGET_INFO=-1 %libomptarget-run-generic 2>&1 | %fcheck-generic +// clang-format on + +// REQUIRES: amdgcn-amd-amdhsa +// UNSUPPORTED: clang-6, clang-7, clang-8, clang-9 + +#include + +#include +#include + +#pragma omp requires unified_shared_memory + +int main(int argc, char *argv[]) { + int *v = (int *)malloc(sizeof(int) * 100); + +// clang-format off +// CHECK: Entering OpenMP kernel at {{.*}} with 1 arguments: +// CHECK: Creating new map entry ONLY with HstPtrBase=[[V_HST_PTR_ADDR:0x.*]], HstPtrBegin=[[V_HST_PTR_ADDR]], TgtAllocBegin=[[V_HST_PTR_ADDR]], TgtPtrBegin=[[V_HST_PTR_ADDR]], Size=200, DynRefCount=1, HoldRefCount=0 +// CHECK: Mapping exists with HstPtrBegin=[[V_HST_PTR_ADDR]], TgtPtrBegin=[[V_HST_PTR_ADDR]], Size=200, DynRefCount=1 (update suppressed), HoldRefCount=0 +// CHECK: Launching kernel __omp_offloading_{{.*}}_main_l{{.*}} with 1 blocks and 256 threads in Generic mode + +// CHECK: Mapping exists with HstPtrBegin=[[V_HST_PTR_ADDR]], TgtPtrBegin=[[V_HST_PTR_ADDR]], Size=200, DynRefCount=0 (decremented, delayed deletion), HoldRefCount=0 +// CHECK: Removing map entry with HstPtrBegin=[[V_HST_PTR_ADDR]], TgtPtrBegin=[[V_HST_PTR_ADDR]], Size=200 + +// CHECK: Entering OpenMP kernel at {{.*}} with 1 arguments: +// CHECK: Creating new map entry ONLY with HstPtrBase=[[V_HST_PTR_ADDR]], HstPtrBegin=[[V_HST_PTR_ADDR]], TgtAllocBegin=[[V_HST_PTR_ADDR]], TgtPtrBegin=[[V_HST_PTR_ADDR]], Size=280, DynRefCount=1, HoldRefCount=0 +// CHECK: Mapping exists with HstPtrBegin=[[V_HST_PTR_ADDR]], TgtPtrBegin=[[V_HST_PTR_ADDR]], Size=280, DynRefCount=1 (update suppressed), HoldRefCount=0 +// CHECK: Launching kernel __omp_offloading_{{.*}}_main_l{{.*}} with 1 blocks and 256 threads in Generic mode +// CHECK: Mapping exists with HstPtrBegin=[[V_HST_PTR_ADDR]], TgtPtrBegin=[[V_HST_PTR_ADDR]], Size=280, DynRefCount=0 (decremented, delayed deletion), HoldRefCount=0 +// CHECK: Removing map entry with HstPtrBegin=[[V_HST_PTR_ADDR]], TgtPtrBegin=[[V_HST_PTR_ADDR]], Size=280 +// clang-format on +#pragma omp target map(tofrom : v[ : 50]) + { v[32] = 32; } + +#pragma omp target map(tofrom : v[ : 70]) + { v[64] = 64; } + + printf("v[32] = %d, v[64] = %d\n", v[32], v[64]); + + free(v); + + std::cout << "PASS\n"; + return 0; +} +// CHECK: v[32] = 32, v[64] = 64 +// CHECK: PASS diff --git a/openmp/libomptarget/test/unified_shared_memory/unified_map_checks_close_enter_exit.cpp b/openmp/libomptarget/test/unified_shared_memory/unified_map_checks_close_enter_exit.cpp new file mode 100644 index 0000000000000..8d3dd1f72200a --- /dev/null +++ b/openmp/libomptarget/test/unified_shared_memory/unified_map_checks_close_enter_exit.cpp @@ -0,0 +1,222 @@ +// clang-format off +// RUN: %libomptarget-compilexx-generic && env HSA_XNACK=1 LIBOMPTARGET_INFO=-1 %libomptarget-run-generic 2>&1 | %fcheck-generic +// clang-format on + +// UNSUPPORTED: clang-6, clang-7, clang-8, clang-9 + +// REQUIRES: amdgcn-amd-amdhsa + +#include +#include + +#pragma omp requires unified_shared_memory + +#define N 1024 + +int main(int argc, char *argv[]) { + void *host_alloc = nullptr, *device_alloc = nullptr; + int *a = (int *)malloc(N * sizeof(int)); + int dev = omp_get_default_device(); + + // Init + for (int i = 0; i < N; ++i) { + a[i] = 10; + } + host_alloc = &a[0]; + + // + // map + target no close + // + +// clang-format off +// CHECK: Entering OpenMP data region with being_mapper at {{.*}} with 2 arguments: +// CHECK: Creating new map entry ONLY with HstPtrBase=[[A_HST_PTR:0x.*]], HstPtrBegin=[[A_HST_PTR]], TgtAllocBegin=[[A_HST_PTR]], TgtPtrBegin=[[A_HST_PTR]], Size=4096, DynRefCount=1, HoldRefCount=0 +// CHECK: Creating new map entry ONLY with HstPtrBase=[[DEVICE_ALLOC_HST_PTR:0x.*]], HstPtrBegin=[[DEVICE_ALLOC_HST_PTR]], TgtAllocBegin=[[DEVICE_ALLOC_HST_PTR]], TgtPtrBegin=[[DEVICE_ALLOC_HST_PTR]], Size=8, DynRefCount=1, HoldRefCount=0 +// CHECK: OpenMP Host-Device pointer mappings after block +// CHECK: Host Ptr +// CHECK: [[A_HST_PTR]] +// CHECK: [[DEVICE_ALLOC_HST_PTR]] +// clang-format on +#pragma omp target data map(tofrom : a[ : N]) map(tofrom : device_alloc) + { +// clang-format off +// CHECK: Entering OpenMP kernel at {{.*}} with 2 arguments: +// CHECK: Mapping exists with HstPtrBegin=[[DEVICE_ALLOC_HST_PTR]], TgtPtrBegin=[[DEVICE_ALLOC_HST_PTR]], Size=8, DynRefCount=2 (incremented), HoldRefCount=0 +// CHECK: Mapping exists (implicit) with HstPtrBegin=[[A_HST_PTR]], TgtPtrBegin=[[A_HST_PTR]], Size=0, DynRefCount=2 (incremented), HoldRefCount=0 +// CHECK: Mapping exists with HstPtrBegin=[[DEVICE_ALLOC_HST_PTR]], TgtPtrBegin=[[DEVICE_ALLOC_HST_PTR]], Size=8, DynRefCount=2 (update suppressed), HoldRefCount=0 +// CHECK: Mapping exists with HstPtrBegin=[[A_HST_PTR]], TgtPtrBegin=[[A_HST_PTR]], Size=0, DynRefCount=2 (update suppressed), HoldRefCount=0 +// CHECK: Launching kernel __omp_offloading_{{.*}}_main_l{{.*}} with 1 blocks and 256 threads in Generic mode +// clang-format on +#pragma omp target map(tofrom : device_alloc) + { device_alloc = &a[0]; } + } + // clang-format off +// CHECK: Mapping exists with HstPtrBegin=[[A_HST_PTR]], TgtPtrBegin=[[A_HST_PTR]], Size=0, DynRefCount=1 (decremented), HoldRefCount=0 +// CHECK: Mapping exists with HstPtrBegin=[[DEVICE_ALLOC_HST_PTR]], TgtPtrBegin=[[DEVICE_ALLOC_HST_PTR]], Size=8, DynRefCount=1 (decremented), HoldRefCount=0 +// CHECK: OpenMP Host-Device pointer mappings after block +// CHECK: Host Ptr +// CHECK: [[A_HST_PTR]] +// CHECK: [[DEVICE_ALLOC_HST_PTR]] +// CHECK: Exiting OpenMP data region with end_mapper at {{.*}} with 2 arguments: +// CHECK: Mapping exists with HstPtrBegin=[[DEVICE_ALLOC_HST_PTR]], TgtPtrBegin=[[DEVICE_ALLOC_HST_PTR]], Size=8, DynRefCount=0 (decremented, delayed deletion), HoldRefCount=0 +// CHECK: Mapping exists with HstPtrBegin=[[A_HST_PTR]], TgtPtrBegin=[[A_HST_PTR]], Size=4096, DynRefCount=0 (decremented, delayed deletion), HoldRefCount=0 +// CHECK: Removing map entry with HstPtrBegin=[[DEVICE_ALLOC_HST_PTR]], TgtPtrBegin=[[DEVICE_ALLOC_HST_PTR]], Size=8 +// CHECK: Removing map entry with HstPtrBegin=[[A_HST_PTR]], TgtPtrBegin=[[A_HST_PTR]], Size=4096 + // clang-format on + if (device_alloc == host_alloc) + printf("a used from unified memory.\n"); + + // + // map + target with close + // + // clang-format off +// CHECK: Entering OpenMP data region with being_mapper at {{.*}} with 2 arguments: +// CHECK: Creating new map entry with HstPtrBase=[[A_HST_PTR]], HstPtrBegin=[[A_HST_PTR]], TgtAllocBegin=[[A_DEV_PTR:0x.*]], TgtPtrBegin=[[A_DEV_PTR]], Size=4096, DynRefCount=1, HoldRefCount=0 +// CHECK: Copying data from host to device, HstPtr=[[A_HST_PTR]], TgtPtr=[[A_DEV_PTR]], Size=4096 +// CHECK: Creating new map entry ONLY with HstPtrBase=[[DEVICE_ALLOC_HST_PTR]], HstPtrBegin=[[DEVICE_ALLOC_HST_PTR]], TgtAllocBegin=[[DEVICE_ALLOC_HST_PTR]], TgtPtrBegin=[[DEVICE_ALLOC_HST_PTR]], Size=8, DynRefCount=1, HoldRefCount=0 +// CHECK: OpenMP Host-Device pointer mappings after block +// CHECK: Host Ptr +// CHECK: [[A_HST_PTR]] +// CHECK: [[DEVICE_ALLOC_HST_PTR]] + // clang-format on + device_alloc = 0; +#pragma omp target data map(close, tofrom : a[ : N]) map(tofrom : device_alloc) + { +// clang-format off +// CHECK: Entering OpenMP kernel at {{.*}} with 2 arguments: +// CHECK: Mapping exists with HstPtrBegin=[[DEVICE_ALLOC_HST_PTR]], TgtPtrBegin=[[DEVICE_ALLOC_HST_PTR]], Size=8, DynRefCount=2 (incremented), HoldRefCount=0 +// CHECK: Mapping exists (implicit) with HstPtrBegin=[[A_HST_PTR]], TgtPtrBegin=[[A_DEV_PTR]], Size=0, DynRefCount=2 (incremented), HoldRefCount=0 +// CHECK: Mapping exists with HstPtrBegin=[[DEVICE_ALLOC_HST_PTR]], TgtPtrBegin=[[DEVICE_ALLOC_HST_PTR]], Size=8, DynRefCount=2 (update suppressed), HoldRefCount=0 +// CHECK: Mapping exists with HstPtrBegin=[[A_HST_PTR]], TgtPtrBegin=[[A_DEV_PTR]], Size=0, DynRefCount=2 (update suppressed), HoldRefCount=0 +// CHECK: Launching kernel __omp_offloading_{{.*}}_main_l{{.*}} with 1 blocks and 256 threads in Generic mode +// clang-format on +#pragma omp target map(tofrom : device_alloc) + { device_alloc = &a[0]; } + } + // clang-format off +// CHECK: Mapping exists with HstPtrBegin=[[A_HST_PTR]], TgtPtrBegin=[[A_DEV_PTR]], Size=0, DynRefCount=1 (decremented), HoldRefCount=0 +// CHECK: Mapping exists with HstPtrBegin=[[DEVICE_ALLOC_HST_PTR]], TgtPtrBegin=[[DEVICE_ALLOC_HST_PTR]], Size=8, DynRefCount=1 (decremented), HoldRefCount=0 +// CHECK: OpenMP Host-Device pointer mappings after block +// CHECK: Host Ptr +// CHECK: [[A_HST_PTR]] +// CHECK: [[DEVICE_ALLOC_HST_PTR]] +// CHECK: Exiting OpenMP data region with end_mapper at {{.*}} with 2 arguments: +// CHECK: Mapping exists with HstPtrBegin=[[DEVICE_ALLOC_HST_PTR]], TgtPtrBegin=[[DEVICE_ALLOC_HST_PTR]], Size=8, DynRefCount=0 (decremented, delayed deletion), HoldRefCount=0 +// CHECK: Mapping exists with HstPtrBegin=[[A_HST_PTR]], TgtPtrBegin=[[A_DEV_PTR]], Size=4096, DynRefCount=0 (decremented, delayed deletion), HoldRefCount=0 +// CHECK: Copying data from device to host, TgtPtr=[[A_DEV_PTR]], HstPtr=[[A_HST_PTR]], Size=4096 +// CHECK: Removing map entry with HstPtrBegin=[[DEVICE_ALLOC_HST_PTR]], TgtPtrBegin=[[DEVICE_ALLOC_HST_PTR]], Size=8 +// CHECK: Removing map entry with HstPtrBegin=[[A_HST_PTR]], TgtPtrBegin=[[A_DEV_PTR]], Size=4096 + // clang-format on + if (device_alloc != host_alloc) + printf("a copied to device.\n"); + + // + // map + use_device_ptr no close + // + // clang-format off +// CHECK: Entering OpenMP data region with being_mapper at {{.*}} with 1 arguments: +// CHECK: Creating new map entry ONLY with HstPtrBase=[[A_HST_PTR]], HstPtrBegin=[[A_HST_PTR]], TgtAllocBegin=[[A_HST_PTR]], TgtPtrBegin=[[A_HST_PTR]], Size=4096, DynRefCount=1, HoldRefCount=0 +// CHECK: OpenMP Host-Device pointer mappings after block +// CHECK: Host Ptr +// CHECK: [[A_HST_PTR]] + // clang-format on + device_alloc = 0; +#pragma omp target data map(tofrom : a[ : N]) use_device_ptr(a) + { device_alloc = &a[0]; } + // clang-format off +// CHECK: Exiting OpenMP data region with end_mapper at {{.*}} with 1 arguments: +// CHECK: Mapping exists with HstPtrBegin=[[A_HST_PTR]], TgtPtrBegin=[[A_HST_PTR]], Size=4096, DynRefCount=0 (decremented, delayed deletion), HoldRefCount=0 +// CHECK: Removing map entry with HstPtrBegin=[[A_HST_PTR]], TgtPtrBegin=[[A_HST_PTR]], Size=4096 + // clang-format on + if (device_alloc == host_alloc) + printf("a used from unified memory with use_device_ptr.\n"); + + // + // map + use_device_ptr close + // + // clang-format off +// CHECK: Entering OpenMP data region with being_mapper at {{.*}} with 1 arguments: +// CHECK: Creating new map entry with HstPtrBase=[[A_HST_PTR]], HstPtrBegin=[[A_HST_PTR]], TgtAllocBegin=[[A_DEV_PTR:0x.*]], TgtPtrBegin=[[A_DEV_PTR]], Size=4096, DynRefCount=1, HoldRefCount=0 +// CHECK: Copying data from host to device, HstPtr=[[A_HST_PTR]], TgtPtr=[[A_DEV_PTR]], Size=4096 +// CHECK: OpenMP Host-Device pointer mappings after block +// CHECK: Host Ptr +// CHECK: [[A_HST_PTR]] + // clang-format on + device_alloc = 0; +#pragma omp target data map(close, tofrom : a[ : N]) use_device_ptr(a) + { device_alloc = &a[0]; } + // clang-format off +// CHECK: Exiting OpenMP data region with end_mapper at {{.*}} with 1 arguments: +// CHECK: Mapping exists with HstPtrBegin=[[A_HST_PTR]], TgtPtrBegin=[[A_DEV_PTR]], Size=4096, DynRefCount=0 (decremented, delayed deletion), HoldRefCount=0 +// CHECK: Copying data from device to host, TgtPtr=[[A_DEV_PTR]], HstPtr=[[A_HST_PTR]], Size=4096 +// CHECK: Removing map entry with HstPtrBegin=[[A_HST_PTR]], TgtPtrBegin=[[A_DEV_PTR]], Size=4096 + // clang-format on + if (device_alloc != host_alloc) + printf("a used from device memory with use_device_ptr.\n"); + + // + // map enter/exit + close + // + // clang-format off +// CHECK: Entering OpenMP data region with being_mapper at {{.*}} with 1 arguments: +// CHECK: Creating new map entry with HstPtrBase=[[A_HST_PTR]], HstPtrBegin=[[A_HST_PTR]], TgtAllocBegin=[[A_DEV_PTR:0x.*]], TgtPtrBegin=[[A_DEV_PTR]], Size=4096, DynRefCount=1, HoldRefCount=0 +// CHECK: Copying data from host to device, HstPtr=[[A_HST_PTR]], TgtPtr=[[A_DEV_PTR]], Size=4096 +// CHECK: OpenMP Host-Device pointer mappings after block +// CHECK: Host Ptr +// CHECK: [[A_HST_PTR]] + // clang-format on + device_alloc = 0; +#pragma omp target enter data map(close, to : a[ : N]) +// clang-format off +// CHECK: Entering OpenMP kernel at {{.*}} with 2 arguments: +// CHECK: Creating new map entry ONLY with HstPtrBase=[[DEVICE_ALLOC_HST_PTR]], HstPtrBegin=[[DEVICE_ALLOC_HST_PTR]], TgtAllocBegin=[[DEVICE_ALLOC_HST_PTR]], TgtPtrBegin=[[DEVICE_ALLOC_HST_PTR]], Size=8, DynRefCount=1, HoldRefCount=0 +// CHECK: Mapping exists (implicit) with HstPtrBegin=[[A_HST_PTR]], TgtPtrBegin=[[A_DEV_PTR]], Size=0, DynRefCount=2 (incremented), HoldRefCount=0 +// CHECK: Mapping exists with HstPtrBegin=[[DEVICE_ALLOC_HST_PTR]], TgtPtrBegin=[[DEVICE_ALLOC_HST_PTR]], Size=8, DynRefCount=1 (update suppressed), HoldRefCount=0 +// CHECK: Mapping exists with HstPtrBegin=[[A_HST_PTR]], TgtPtrBegin=[[A_DEV_PTR]], Size=0, DynRefCount=2 (update suppressed), HoldRefCount=0 +// CHECK: Launching kernel __omp_offloading_{{.*}}_main_l{{.*}} with 1 blocks and 256 threads in Generic mode +// clang-format on +#pragma omp target map(from : device_alloc) + { + device_alloc = &a[0]; + a[0] = 99; + } + // clang-format off +// CHECK: Mapping exists with HstPtrBegin=[[A_HST_PTR]], TgtPtrBegin=[[A_DEV_PTR]], Size=0, DynRefCount=1 (decremented), HoldRefCount=0 +// CHECK: Mapping exists with HstPtrBegin=[[DEVICE_ALLOC_HST_PTR]], TgtPtrBegin=[[DEVICE_ALLOC_HST_PTR]], Size=8, DynRefCount=0 (decremented, delayed deletion), HoldRefCount=0 +// CHECK: Removing map entry with HstPtrBegin=[[DEVICE_ALLOC_HST_PTR]], TgtPtrBegin=[[DEVICE_ALLOC_HST_PTR]], Size=8 +// CHECK: OpenMP Host-Device pointer mappings after block +// CHECK: Host Ptr +// CHECK: [[A_HST_PTR]] +// CHECK: Exiting OpenMP data region with end_mapper at {{.*}} with 1 arguments: +// CHECK: Mapping exists with HstPtrBegin=[[A_HST_PTR]], TgtPtrBegin=[[A_DEV_PTR]], Size=4096, DynRefCount=0 (decremented, delayed deletion), HoldRefCount=0 +// CHECK: Copying data from device to host, TgtPtr=[[A_DEV_PTR]], HstPtr=[[A_HST_PTR]], Size=4096 +// CHECK: Removing map entry with HstPtrBegin=[[A_HST_PTR]], TgtPtrBegin=[[A_DEV_PTR]], Size=4096 + // clang-format on + + // 'close' is missing, so the runtime must check whether s is actually in + // shared memory in order to determine whether to transfer data and delete the + // allocation. +#pragma omp target exit data map(from : a[ : N]) + + if (device_alloc != host_alloc) + printf("a has been mapped to the device.\n"); + + printf("a[0]=%d\n", a[0]); + printf("a is present: %d\n", omp_target_is_present(a, dev)); + + free(a); + + // CHECK: a used from unified memory. + // CHECK: a copied to device. + // CHECK: a used from unified memory with use_device_ptr. + + // CHECK: a used from device memory with use_device_ptr. + // CHECK: a has been mapped to the device. + // CHECK: a[0]=99 + // CHECK: a is present: 0 + + // CHECK: Done! + printf("Done!\n"); + + return 0; +} diff --git a/openmp/libomptarget/test/unified_shared_memory/unified_map_checks_close_modifier.cpp b/openmp/libomptarget/test/unified_shared_memory/unified_map_checks_close_modifier.cpp new file mode 100644 index 0000000000000..6b4d315b2edd9 --- /dev/null +++ b/openmp/libomptarget/test/unified_shared_memory/unified_map_checks_close_modifier.cpp @@ -0,0 +1,184 @@ +// clang-format off +// RUN: %libomptarget-compilexx-generic && env HSA_XNACK=1 LIBOMPTARGET_INFO=-1 %libomptarget-run-generic 2>&1 | %fcheck-generic +// clang-format on + +// UNSUPPORTED: clang-6, clang-7, clang-8, clang-9 + +// REQUIRES: amdgcn-amd-amdhsa + +#include +#include + +#pragma omp requires unified_shared_memory + +#define N 1024 + +int main(int argc, char *argv[]) { + int fails; + void *host_alloc = nullptr, *device_alloc = nullptr; + void *host_data = nullptr, *device_data = nullptr; + int *alloc = (int *)malloc(N * sizeof(int)); + int data[N]; + + for (int i = 0; i < N; ++i) { + alloc[i] = 10; + data[i] = 1; + } + + host_data = &data[0]; + host_alloc = &alloc[0]; + + // + // Test that updates on the device are not visible to host + // when only a TO mapping is used. + // + + // clang-format off +// CHECK: Creating new map entry ONLY with HstPtrBase=[[DEVICE_DATA_HST_PTR:0x.*]], HstPtrBegin=[[DEVICE_DATA_HST_PTR]], TgtAllocBegin=[[DEVICE_DATA_HST_PTR]], TgtPtrBegin=[[DEVICE_DATA_HST_PTR]], Size=8, DynRefCount=1, HoldRefCount=0 +// CHECK: Creating new map entry with HstPtrBase=[[DATA_HST_PTR:0x.*]], HstPtrBegin=[[DATA_HST_PTR]], TgtAllocBegin=[[DATA_DEV_PTR:0x.*]], TgtPtrBegin=[[DATA_DEV_PTR]], Size=4096, DynRefCount=1, HoldRefCount=0 +// CHECK: Copying data from host to device, HstPtr=[[DATA_HST_PTR]], TgtPtr=[[DATA_DEV_PTR]], Size=4096 +// CHECK: Creating new map entry ONLY with HstPtrBase=[[DEVICE_ALLOC_HST_PTR:0x.*]], HstPtrBegin=[[DEVICE_ALLOC_HST_PTR]], TgtAllocBegin=[[DEVICE_ALLOC_HST_PTR]], TgtPtrBegin=[[DEVICE_ALLOC_HST_PTR]], Size=8, DynRefCount=1, HoldRefCount=0 +// CHECK: Creating new map entry with HstPtrBase=[[ALLOC_HST_PTR:0x.*]], HstPtrBegin=[[ALLOC_HST_PTR]], TgtAllocBegin=[[ALLOC_DEV_PTR:0x.*]], TgtPtrBegin=[[ALLOC_DEV_PTR]], Size=4096, DynRefCount=1, HoldRefCount=0 +// CHECK: Copying data from host to device, HstPtr=[[ALLOC_HST_PTR]], TgtPtr=[[ALLOC_DEV_PTR]], Size=4096 + +// CHECK: Mapping exists with HstPtrBegin=[[DEVICE_DATA_HST_PTR]], TgtPtrBegin=[[DEVICE_DATA_HST_PTR]], Size=8, DynRefCount=1 (update suppressed), HoldRefCount=0 +// CHECK: Mapping exists with HstPtrBegin=[[DATA_HST_PTR]], TgtPtrBegin=[[DATA_DEV_PTR]], Size=4096, DynRefCount=1 (update suppressed), HoldRefCount=0 +// CHECK: Mapping exists with HstPtrBegin=[[DEVICE_ALLOC_HST_PTR]], TgtPtrBegin=[[DEVICE_ALLOC_HST_PTR]], Size=8, DynRefCount=1 (update suppressed), HoldRefCount=0 +// CHECK: Mapping exists with HstPtrBegin=[[ALLOC_HST_PTR]], TgtPtrBegin=[[ALLOC_DEV_PTR]], Size=4096, DynRefCount=1 (update suppressed), HoldRefCount=0 +// CHECK: Launching kernel __omp_offloading_{{.*}}_main_l{{.*}} with 1 blocks and 256 threads in Generic mode + +// CHECK: Mapping exists with HstPtrBegin=[[ALLOC_HST_PTR]], TgtPtrBegin=[[ALLOC_DEV_PTR]], Size=4096, DynRefCount=0 (decremented, delayed deletion), HoldRefCount=0 +// CHECK: Mapping exists with HstPtrBegin=[[DEVICE_ALLOC_HST_PTR]], TgtPtrBegin=[[DEVICE_ALLOC_HST_PTR]], Size=8, DynRefCount=0 (decremented, delayed deletion), HoldRefCount=0 +// CHECK: Mapping exists with HstPtrBegin=[[DATA_HST_PTR]], TgtPtrBegin=[[DATA_DEV_PTR]], Size=4096, DynRefCount=0 (decremented, delayed deletion), HoldRefCount=0 +// CHECK: Mapping exists with HstPtrBegin=[[DEVICE_DATA_HST_PTR]], TgtPtrBegin=[[DEVICE_DATA_HST_PTR]], Size=8, DynRefCount=0 (decremented, delayed deletion), HoldRefCount=0 + +// CHECK: Removing map entry with HstPtrBegin=[[ALLOC_HST_PTR]]{{.*}} Size=4096 +// CHECK: Removing map entry with HstPtrBegin=[[DEVICE_ALLOC_HST_PTR]]{{.*}} Size=8 +// CHECK: Removing map entry with HstPtrBegin=[[DATA_HST_PTR]]{{.*}} Size=4096 +// CHECK: Removing map entry with HstPtrBegin=[[DEVICE_DATA_HST_PTR]]{{.*}} Size=8 + // clang-format on + +#pragma omp target map(tofrom : device_data, device_alloc) \ + map(close, to : alloc[ : N], data[ : N]) + { + device_data = &data[0]; + device_alloc = &alloc[0]; + + for (int i = 0; i < N; i++) { + alloc[i] += 1; + data[i] += 1; + } + } + + if (device_alloc != host_alloc) + printf("Address of alloc on device different from host address.\n"); + + if (device_data != host_data) + printf("Address of data on device different from host address.\n"); + + // On the host, check that the arrays have been updated. + fails = 0; + for (int i = 0; i < N; i++) { + if (alloc[i] != 10) + fails++; + } + printf("Alloc host values not updated: %s\n", + (fails == 0) ? "Succeeded" : "Failed"); + + fails = 0; + for (int i = 0; i < N; i++) { + if (data[i] != 1) + fails++; + } + printf("Data host values not updated: %s\n", + (fails == 0) ? "Succeeded" : "Failed"); + + // + // Test that updates on the device are visible on host + // when a from is used. + // + + for (int i = 0; i < N; i++) { + alloc[i] += 1; + data[i] += 1; + } + + // clang-format off + // CHECK: Creating new map entry with HstPtrBase=[[ALLOC_HST_PTR:0x.*]], HstPtrBegin=[[ALLOC_HST_PTR]], TgtAllocBegin=[[ALLOC_DEV_PTR:0x.*]], TgtPtrBegin=[[ALLOC_DEV_PTR]], Size=4096, DynRefCount=1, HoldRefCount=0 + // CHECK: Copying data from host to device, HstPtr=[[ALLOC_HST_PTR]], TgtPtr=[[ALLOC_DEV_PTR]], Size=4096 + + // CHECK: Creating new map entry with HstPtrBase=[[DATA_HST_PTR:0x.*]], HstPtrBegin=[[DATA_HST_PTR]], TgtAllocBegin=[[DATA_DEV_PTR:0x.*]], TgtPtrBegin=[[DATA_DEV_PTR]], Size=4096, DynRefCount=1, HoldRefCount=0 + // CHECK: Copying data from host to device, HstPtr=[[DATA_HST_PTR]], TgtPtr=[[DATA_DEV_PTR]], Size=4096 + + // CHECK: Mapping exists with HstPtrBegin=[[ALLOC_HST_PTR]], TgtPtrBegin=[[ALLOC_DEV_PTR]], Size=4096, DynRefCount=1 (update suppressed), HoldRefCount=0 + // CHECK: Mapping exists with HstPtrBegin=[[DATA_HST_PTR]], TgtPtrBegin=[[DATA_DEV_PTR]], Size=4096, DynRefCount=1 (update suppressed), HoldRefCount=0 + + // CHECK: Launching kernel __omp_offloading_{{.*}}_main_l{{.*}} with 1 blocks and 256 threads in Generic mode + + // CHECK: Mapping exists with HstPtrBegin=[[DATA_HST_PTR]], TgtPtrBegin=[[DATA_DEV_PTR]], Size=4096, DynRefCount=0 (decremented, delayed deletion), HoldRefCount=0 + // CHECK: Mapping exists with HstPtrBegin=[[ALLOC_HST_PTR]], TgtPtrBegin=[[ALLOC_DEV_PTR]], Size=4096, DynRefCount=0 (decremented, delayed deletion), HoldRefCount=0 + + // CHECK: Removing map entry with HstPtrBegin=[[DATA_HST_PTR]]{{.*}} Size=4096 + // CHECK: Removing map entry with HstPtrBegin=[[ALLOC_HST_PTR]]{{.*}} Size=4096 + // clang-format on + + int alloc_fails = 0; + int data_fails = 0; +#pragma omp target map(close, tofrom : alloc[ : N], data[ : N]) \ + map(tofrom : alloc_fails, data_fails) + { + for (int i = 0; i < N; i++) { + if (alloc[i] != 11) + alloc_fails++; + } + for (int i = 0; i < N; i++) { + if (data[i] != 2) + data_fails++; + } + + // Update values on the device + for (int i = 0; i < N; i++) { + alloc[i] += 1; + data[i] += 1; + } + } + + printf("Alloc device values are correct: %s\n", + (alloc_fails == 0) ? "Succeeded" : "Failed"); + printf("Data device values are correct: %s\n", + (data_fails == 0) ? "Succeeded" : "Failed"); + + fails = 0; + for (int i = 0; i < N; i++) { + if (alloc[i] != 12) + fails++; + } + printf("Alloc host values updated: %s\n", + (fails == 0) ? "Succeeded" : "Failed"); + + fails = 0; + for (int i = 0; i < N; i++) { + if (data[i] != 3) + fails++; + } + printf("Data host values updated: %s\n", + (fails == 0) ? "Succeeded" : "Failed"); + + free(alloc); + + // CHECK: Address of alloc on device different from host address. + // CHECK: Address of data on device different from host address. + // On the host, check that the arrays have been updated. + // CHECK: Alloc host values not updated: Succeeded + // CHECK: Data host values not updated: Succeeded + + // CHECK: Alloc device values are correct: Succeeded + // CHECK: Data device values are correct: Succeeded + // CHECK: Alloc host values updated: Succeeded + // CHECK: Data host values updated: Succeeded + + // CHECK: Done! + printf("Done!\n"); + + return 0; +} diff --git a/openmp/libomptarget/test/unified_shared_memory/unified_map_checks_error.cpp b/openmp/libomptarget/test/unified_shared_memory/unified_map_checks_error.cpp new file mode 100644 index 0000000000000..80ccc773031c7 --- /dev/null +++ b/openmp/libomptarget/test/unified_shared_memory/unified_map_checks_error.cpp @@ -0,0 +1,36 @@ +// clang-format off +// RUN: %libomptarget-compilexx-generic && env HSA_XNACK=1 LIBOMPTARGET_INFO=-1 %libomptarget-run-fail-generic 2>&1 | %fcheck-generic +// clang-format on + +// REQUIRES: amdgcn-amd-amdhsa +// UNSUPPORTED: clang-6, clang-7, clang-8, clang-9 + +#include + +#include +#include + +#pragma omp requires unified_shared_memory + +int main(int argc, char *argv[]) { + int *v = (int *)malloc(sizeof(int) * 100); + +// clang-format off +// CHECK: Entering OpenMP data region with being_mapper at {{.*}} with 1 arguments: +// CHECK: Creating new map entry ONLY with HstPtrBase=[[V_HST_PTR_ADDR:0x.*]], HstPtrBegin=[[V_HST_PTR_ADDR]], TgtAllocBegin=[[V_HST_PTR_ADDR]], TgtPtrBegin=[[V_HST_PTR_ADDR]], Size=200, DynRefCount=1, HoldRefCount=0 +// CHECK: explicit extension not allowed: host address specified is [[V_HST_PTR_ADDR]] (280 bytes), but device allocation maps to host at [[V_HST_PTR_ADDR]] (200 bytes) +// CHECK: Call to getTargetPointer returned null pointer (device failure or illegal mapping). +// clang-format on +#pragma omp target enter data map(to : v[ : 50]) + +#pragma omp target enter data map(to : v[ : 70]) + +#pragma omp target + {} + + free(v); + + std::cout << "PASS\n"; + return 0; +} +// CHECK-NOT: PASS diff --git a/openmp/libomptarget/test/unified_shared_memory/unified_map_checks_no_target.cpp b/openmp/libomptarget/test/unified_shared_memory/unified_map_checks_no_target.cpp new file mode 100644 index 0000000000000..70b2f1d6ae64b --- /dev/null +++ b/openmp/libomptarget/test/unified_shared_memory/unified_map_checks_no_target.cpp @@ -0,0 +1,36 @@ +// clang-format off +// RUN: %libomptarget-compilexx-generic && env HSA_XNACK=1 LIBOMPTARGET_INFO=-1 %libomptarget-run-generic 2>&1 | %fcheck-generic +// clang-format on + +// REQUIRES: amdgcn-amd-amdhsa +// UNSUPPORTED: clang-6, clang-7, clang-8, clang-9 + +#include + +#include +#include + +#pragma omp requires unified_shared_memory + +/// In the current implementation the lack of a target region in the code +/// means that unified shared memory is not being enabled even if the pragma +/// is used explicitly. The code below showcases the copying of data to the +/// GPU. + +int main(int argc, char *argv[]) { + int *v = (int *)malloc(sizeof(int) * 100); + + // clang-format off +// CHECK: Entering OpenMP data region with being_mapper at {{.*}} with 1 arguments: +// CHECK: Creating new map entry with HstPtrBase=[[V_HST_PTR_ADDR:0x.*]], HstPtrBegin=[[V_HST_PTR_ADDR]], TgtAllocBegin=[[V_DEV_PTR_ADDR:0x.*]], TgtPtrBegin=[[V_DEV_PTR_ADDR]], Size=200, DynRefCount=1, HoldRefCount=0 +// CHECK: Copying data from host to device, HstPtr=[[V_HST_PTR_ADDR]], TgtPtr=[[V_DEV_PTR_ADDR]], Size=200 + // clang-format on + +#pragma omp target enter data map(to : v[ : 50]) + + free(v); + + std::cout << "PASS\n"; + return 0; +} +// CHECK: PASS diff --git a/openmp/libomptarget/test/unified_shared_memory/unified_map_checks_scalars.cpp b/openmp/libomptarget/test/unified_shared_memory/unified_map_checks_scalars.cpp new file mode 100644 index 0000000000000..68689d6fd1138 --- /dev/null +++ b/openmp/libomptarget/test/unified_shared_memory/unified_map_checks_scalars.cpp @@ -0,0 +1,94 @@ +// clang-format off +// RUN: %libomptarget-compilexx-generic && env HSA_XNACK=1 LIBOMPTARGET_INFO=-1 %libomptarget-run-generic 2>&1 | %fcheck-generic +// clang-format on + +// REQUIRES: amdgcn-amd-amdhsa +// UNSUPPORTED: clang-6, clang-7, clang-8, clang-9 + +#include + +#include +#include + +#pragma omp requires unified_shared_memory + +int main(int argc, char *argv[]) { + int x = 5; + int y = 7; + int z = 11; + int *v = (int *)malloc(sizeof(int) * 10); +// clang-format off +// CHECK: Entering OpenMP data region with being_mapper at {{.*}} with 1 arguments: +// CHECK: Creating new map entry ONLY with HstPtrBase=[[Z_HST_PTR_BEGIN:0x.*]], HstPtrBegin=[[Z_HST_PTR_BEGIN]], TgtAllocBegin=[[Z_HST_PTR_BEGIN]], TgtPtrBegin=[[Z_HST_PTR_BEGIN]], Size=4, DynRefCount=1, HoldRefCount=0 +// CHECK: OpenMP Host-Device pointer mappings after block +// CHECK: Host Ptr +// CHECK: [[Z_HST_PTR_BEGIN]]{{.*}}[[Z_HST_PTR_BEGIN]] +// clang-format on +#pragma omp target enter data map(to : z) +// clang-format off +// CHECK: Entering OpenMP kernel at {{.*}} with 4 arguments: +// CHECK: Creating new map entry ONLY with HstPtrBase=[[X_HST_PTR_BEGIN:0x.*]], HstPtrBegin=[[X_HST_PTR_BEGIN]], TgtAllocBegin=[[X_HST_PTR_BEGIN]], TgtPtrBegin=[[X_HST_PTR_BEGIN]], Size=4, DynRefCount=1, HoldRefCount=0 +// CHECK: Creating new map entry ONLY with HstPtrBase=[[Y_HST_PTR_BEGIN:0x.*]], HstPtrBegin=[[Y_HST_PTR_BEGIN]], TgtAllocBegin=[[Y_HST_PTR_BEGIN]], TgtPtrBegin=[[Y_HST_PTR_BEGIN]], Size=4, DynRefCount=1, HoldRefCount=0 +// CHECK: variable {{.*}} does not have a valid device counterpart +// CHECK: Mapping exists with HstPtrBegin=[[X_HST_PTR_BEGIN]]{{.*}} Size=4, DynRefCount=1 (update suppressed), HoldRefCount=0 +// CHECK: Mapping exists with HstPtrBegin=[[Y_HST_PTR_BEGIN]]{{.*}} Size=4, DynRefCount=1 (update suppressed), HoldRefCount=0 +// CHECK: Launching kernel __omp_offloading_{{.*}}_main_l{{.*}} with 1 blocks and 256 threads in Generic mode +// clang-format on +#pragma omp target map(tofrom : x) map(always, tofrom : y) map(to : v[ : 0]) + { + x++; + y++; + z++; + } + + // clang-format off +// CHECK: Mapping exists with HstPtrBegin=[[Y_HST_PTR_BEGIN]]{{.*}} Size=4, DynRefCount=0 (decremented, delayed deletion), HoldRefCount=0 +// CHECK: Mapping exists with HstPtrBegin=[[X_HST_PTR_BEGIN]]{{.*}} Size=4, DynRefCount=0 (decremented, delayed deletion), HoldRefCount=0 +// CHECK: Removing map entry with HstPtrBegin=[[Y_HST_PTR_BEGIN]], TgtPtrBegin=[[Y_HST_PTR_BEGIN]], Size=4 +// CHECK: Removing map entry with HstPtrBegin=[[X_HST_PTR_BEGIN]], TgtPtrBegin=[[X_HST_PTR_BEGIN]], Size=4 +// CHECK: OpenMP Host-Device pointer mappings after block +// CHECK: Host Ptr +// CHECK: [[Z_HST_PTR_BEGIN]]{{.*}}[[Z_HST_PTR_BEGIN]] + // clang-format on + printf("x = %d, y = %d, z = %d\n", x, y, z); + +// clang-format off +// CHECK: Entering OpenMP kernel at {{.*}} with 4 arguments: +// CHECK: Creating new map entry ONLY with HstPtrBase=[[X_HST_PTR_BEGIN:0x.*]], HstPtrBegin=[[X_HST_PTR_BEGIN]], TgtAllocBegin=[[X_HST_PTR_BEGIN]], TgtPtrBegin=[[X_HST_PTR_BEGIN]], Size=4, DynRefCount=1, HoldRefCount=0 +// CHECK: Creating new map entry ONLY with HstPtrBase=[[Y_HST_PTR_BEGIN:0x.*]], HstPtrBegin=[[Y_HST_PTR_BEGIN]], TgtAllocBegin=[[Y_HST_PTR_BEGIN]], TgtPtrBegin=[[Y_HST_PTR_BEGIN]], Size=4, DynRefCount=1, HoldRefCount=0 +// CHECK: variable {{.*}} does not have a valid device counterpart +// CHECK: Mapping exists with HstPtrBegin=[[X_HST_PTR_BEGIN]]{{.*}} Size=4, DynRefCount=1 (update suppressed), HoldRefCount=0 +// CHECK: Mapping exists with HstPtrBegin=[[Y_HST_PTR_BEGIN]]{{.*}} Size=4, DynRefCount=1 (update suppressed), HoldRefCount=0 +// CHECK: Launching kernel __omp_offloading_{{.*}}_main_l{{.*}} with 1 blocks and 256 threads in Generic mode +// clang-format on +#pragma omp target map(tofrom : x) map(always, tofrom : y) map(to : v[ : 0]) + { + x++; + y++; + z++; + } +// clang-format off +// CHECK: Mapping exists with HstPtrBegin=[[Y_HST_PTR_BEGIN]]{{.*}} Size=4, DynRefCount=0 (decremented, delayed deletion), HoldRefCount=0 +// CHECK: Mapping exists with HstPtrBegin=[[X_HST_PTR_BEGIN]]{{.*}} Size=4, DynRefCount=0 (decremented, delayed deletion), HoldRefCount=0 +// CHECK: Removing map entry with HstPtrBegin=[[Y_HST_PTR_BEGIN]], TgtPtrBegin=[[Y_HST_PTR_BEGIN]], Size=4 +// CHECK: Removing map entry with HstPtrBegin=[[X_HST_PTR_BEGIN]], TgtPtrBegin=[[X_HST_PTR_BEGIN]], Size=4 +// CHECK: OpenMP Host-Device pointer mappings after block +// CHECK: Host Ptr +// CHECK: [[Z_HST_PTR_BEGIN]]{{.*}}[[Z_HST_PTR_BEGIN]] +// clang-format on +#pragma omp target exit data map(from : z) + // clang-format off +// CHECK: Exiting OpenMP data region with end_mapper at {{.*}} with 1 arguments: +// CHECK: Mapping exists with HstPtrBegin=[[Z_HST_PTR_BEGIN]], TgtPtrBegin=[[Z_HST_PTR_BEGIN]], Size=4, DynRefCount=0 (decremented, delayed deletion), HoldRefCount=0 +// CHECK: Removing map entry with HstPtrBegin=[[Z_HST_PTR_BEGIN]], TgtPtrBegin=[[Z_HST_PTR_BEGIN]], Size=4 + // clang-format on + printf("x = %d, y = %d, z = %d\n", x, y, z); + + free(v); + + std::cout << "PASS\n"; + return 0; +} +// CHECK: x = 6, y = 8, z = 11 +// CHECK: x = 7, y = 9, z = 11 +// CHECK: PASS diff --git a/openmp/libomptarget/test/unified_shared_memory/unified_map_checks_shared_update.cpp b/openmp/libomptarget/test/unified_shared_memory/unified_map_checks_shared_update.cpp new file mode 100644 index 0000000000000..e4794d55b7301 --- /dev/null +++ b/openmp/libomptarget/test/unified_shared_memory/unified_map_checks_shared_update.cpp @@ -0,0 +1,137 @@ +// clang-format off +// RUN: %libomptarget-compilexx-generic && env HSA_XNACK=1 LIBOMPTARGET_INFO=-1 %libomptarget-run-generic 2>&1 | %fcheck-generic +// clang-format on + +// UNSUPPORTED: clang-6, clang-7, clang-8, clang-9 + +// REQUIRES: amdgcn-amd-amdhsa + +#include +#include + +#pragma omp requires unified_shared_memory + +#define N 1024 + +int main(int argc, char *argv[]) { + int fails; + void *host_alloc = nullptr, *device_alloc = nullptr; + void *host_data = nullptr, *device_data = nullptr; + int *alloc = (int *)malloc(N * sizeof(int)); + int data[N]; + + for (int i = 0; i < N; ++i) { + alloc[i] = 10; + data[i] = 1; + } + + host_data = &data[0]; + host_alloc = &alloc[0]; + +// clang-format off +// CHECK: Creating new map entry ONLY with HstPtrBase=[[DEVICE_DATA_HST_PTR:0x.*]], HstPtrBegin=[[DEVICE_DATA_HST_PTR]], TgtAllocBegin=[[DEVICE_DATA_HST_PTR]], TgtPtrBegin=[[DEVICE_DATA_HST_PTR]], Size=8, DynRefCount=1, HoldRefCount=0 +// CHECK: Creating new map entry ONLY with HstPtrBase=[[DATA_HST_PTR:0x.*]], HstPtrBegin=[[DATA_HST_PTR]], TgtAllocBegin=[[DATA_HST_PTR]], TgtPtrBegin=[[DATA_HST_PTR]], Size=4096, DynRefCount=1, HoldRefCount=0 +// CHECK: Creating new map entry ONLY with HstPtrBase=[[DEVICE_ALLOC_HST_PTR:0x.*]], HstPtrBegin=[[DEVICE_ALLOC_HST_PTR]], TgtAllocBegin=[[DEVICE_ALLOC_HST_PTR]], TgtPtrBegin=[[DEVICE_ALLOC_HST_PTR]], Size=8, DynRefCount=1, HoldRefCount=0 +// CHECK: Mapping exists with HstPtrBegin=[[DEVICE_DATA_HST_PTR]], TgtPtrBegin=[[DEVICE_DATA_HST_PTR]], Size=8, DynRefCount=1 (update suppressed), HoldRefCount=0 +// CHECK: Mapping exists with HstPtrBegin=[[DATA_HST_PTR]], TgtPtrBegin=[[DATA_HST_PTR]], Size=4096, DynRefCount=1 (update suppressed), HoldRefCount=0 +// CHECK: Mapping exists with HstPtrBegin=[[DEVICE_ALLOC_HST_PTR]], TgtPtrBegin=[[DEVICE_ALLOC_HST_PTR]], Size=8, DynRefCount=1 (update suppressed), HoldRefCount=0 + +// CHECK: Launching kernel __omp_offloading_{{.*}}_main_l{{.*}} with 1 blocks and 256 threads in Generic mode + +// CHECK: Mapping exists with HstPtrBegin=[[DEVICE_ALLOC_HST_PTR]], TgtPtrBegin=[[DEVICE_ALLOC_HST_PTR]], Size=8, DynRefCount=0 (decremented, delayed deletion), HoldRefCount=0 +// CHECK: Mapping exists with HstPtrBegin=[[DATA_HST_PTR]], TgtPtrBegin=[[DATA_HST_PTR]], Size=4096, DynRefCount=0 (decremented, delayed deletion), HoldRefCount=0 +// CHECK: Mapping exists with HstPtrBegin=[[DEVICE_DATA_HST_PTR]], TgtPtrBegin=[[DEVICE_DATA_HST_PTR]], Size=8, DynRefCount=0 (decremented, delayed deletion), HoldRefCount=0 + +// CHECK: Removing map entry with HstPtrBegin=[[DEVICE_ALLOC_HST_PTR]]{{.*}} Size=8 +// CHECK: Removing map entry with HstPtrBegin=[[DATA_HST_PTR]]{{.*}} Size=4096 +// CHECK: Removing map entry with HstPtrBegin=[[DEVICE_DATA_HST_PTR]]{{.*}} Size=8 +// clang-format on + +// implicit mapping of data +#pragma omp target map(tofrom : device_data, device_alloc) + { + device_data = &data[0]; + device_alloc = &alloc[0]; + + for (int i = 0; i < N; i++) { + alloc[i] += 1; + data[i] += 1; + } + } + + if (device_alloc == host_alloc) + printf("Address of alloc on device matches host address.\n"); + + if (device_data == host_data) + printf("Address of data on device matches host address.\n"); + + // On the host, check that the arrays have been updated. + fails = 0; + for (int i = 0; i < N; i++) { + if (alloc[i] != 11) + fails++; + } + printf("Alloc device values updated: %s\n", + (fails == 0) ? "Succeeded" : "Failed"); + + fails = 0; + for (int i = 0; i < N; i++) { + if (data[i] != 2) + fails++; + } + printf("Data device values updated: %s\n", + (fails == 0) ? "Succeeded" : "Failed"); + + // + // Test that updates on the host and on the device are both visible. + // + + // Update on the host. + for (int i = 0; i < N; ++i) { + alloc[i] += 1; + data[i] += 1; + } + + // clang-format off +// CHECK: Creating new map entry ONLY with HstPtrBase=[[DATA_HST_PTR]], HstPtrBegin=[[DATA_HST_PTR]], TgtAllocBegin=[[DATA_HST_PTR]], TgtPtrBegin=[[DATA_HST_PTR]], Size=4096, DynRefCount=1, HoldRefCount=0 + +// CHECK: Mapping exists with HstPtrBegin=[[DATA_HST_PTR]], TgtPtrBegin=[[DATA_HST_PTR]], Size=4096, DynRefCount=1 (update suppressed), HoldRefCount=0 + +// CHECK: Launching kernel __omp_offloading_{{.*}}_main_l{{.*}} with 1 blocks and 256 threads in Generic mode + +// CHECK: Mapping exists with HstPtrBegin=[[DATA_HST_PTR]], TgtPtrBegin=[[DATA_HST_PTR]], Size=4096, DynRefCount=0 (decremented, delayed deletion), HoldRefCount=0 + +// CHECK: Removing map entry with HstPtrBegin=[[DATA_HST_PTR]]{{.*}} Size=4096 + // clang-format on + + int alloc_fails = 0; + int data_fails = 0; +#pragma omp target + { + for (int i = 0; i < N; i++) { + if (alloc[i] != 12) + alloc_fails++; + } + for (int i = 0; i < N; i++) { + if (data[i] != 3) + data_fails++; + } + } + printf("Alloc host values updated: %s\n", + (alloc_fails == 0) ? "Succeeded" : "Failed"); + printf("Data host values updated: %s\n", + (data_fails == 0) ? "Succeeded" : "Failed"); + free(alloc); + + // CHECK: Address of alloc on device matches host address. + // CHECK: Address of data on device matches host address. + // CHECK: Alloc device values updated: Succeeded + // CHECK: Data device values updated: Succeeded + + // CHECK: Alloc host values updated: Succeeded + // CHECK: Data host values updated: Succeeded + + printf("Done!\n"); + + return 0; +} diff --git a/openmp/libomptarget/test/unified_shared_memory/zero_sized_array.cpp b/openmp/libomptarget/test/unified_shared_memory/zero_sized_array.cpp new file mode 100644 index 0000000000000..e24a653e61fe7 --- /dev/null +++ b/openmp/libomptarget/test/unified_shared_memory/zero_sized_array.cpp @@ -0,0 +1,31 @@ +// clang-format off +// RUN: %libomptarget-compilexx-generic && env HSA_XNACK=1 LIBOMPTARGET_INFO=-1 %libomptarget-run-generic 2>&1 | %fcheck-generic +// clang-format on + +// REQUIRES: amdgcn-amd-amdhsa +// UNSUPPORTED: clang-6, clang-7, clang-8, clang-9 + +#include + +#include +#include + +#pragma omp requires unified_shared_memory + +int main(int argc, char *argv[]) { + int *v = (int *)malloc(sizeof(int) * 10); + + printf("host address of v = %p\n", v); + +// CHECK: variable {{.*}} does not have a valid device counterpart +#pragma omp target map(to : v[ : 0]) + { printf("device address of v = %p\n", v); } + + free(v); + + std::cout << "PASS\n"; + return 0; +} +// CHECK: host address of v = [[ADDR_OF_V:0x.*]] +// TODO: once printf is supported add check for ADDR_OF_V on device +// CHECK: PASS