diff --git a/openmp/libomptarget/include/device.h b/openmp/libomptarget/include/device.h index 7bc4629c6220e..cd76d88618be4 100644 --- a/openmp/libomptarget/include/device.h +++ b/openmp/libomptarget/include/device.h @@ -68,7 +68,8 @@ struct HostDataToTargetTy { const uintptr_t HstPtrEnd; // non-inclusive. const map_var_info_t HstPtrName; // Optional source name of mapped variable. - const uintptr_t TgtPtrBegin; // target info. + const uintptr_t TgtAllocBegin; // allocated target memory + const uintptr_t TgtPtrBegin; // mapped target memory = TgtAllocBegin + padding private: static const uint64_t INFRefCount = ~(uint64_t)0; @@ -120,16 +121,18 @@ struct HostDataToTargetTy { const std::unique_ptr States; public: - HostDataToTargetTy(uintptr_t BP, uintptr_t B, uintptr_t E, uintptr_t TB, + HostDataToTargetTy(uintptr_t BP, uintptr_t B, uintptr_t E, + uintptr_t TgtAllocBegin, uintptr_t TgtPtrBegin, bool UseHoldRefCount, map_var_info_t Name = nullptr, bool IsINF = false) : HstPtrBase(BP), HstPtrBegin(B), HstPtrEnd(E), HstPtrName(Name), - TgtPtrBegin(TB), States(std::make_unique(UseHoldRefCount ? 0 - : IsINF ? INFRefCount - : 1, - !UseHoldRefCount ? 0 - : IsINF ? INFRefCount - : 1)) {} + TgtAllocBegin(TgtAllocBegin), TgtPtrBegin(TgtPtrBegin), + States(std::make_unique(UseHoldRefCount ? 0 + : IsINF ? INFRefCount + : 1, + !UseHoldRefCount ? 0 + : IsINF ? INFRefCount + : 1)) {} /// Get the total reference count. This is smarter than just getDynRefCount() /// + getHoldRefCount() because it handles the case where at least one is @@ -446,8 +449,8 @@ struct DeviceTy { /// - Data transfer issue fails. TargetPointerResultTy getTargetPointer( HDTTMapAccessorTy &HDTTMap, void *HstPtrBegin, void *HstPtrBase, - int64_t Size, map_var_info_t HstPtrName, bool HasFlagTo, - bool HasFlagAlways, bool IsImplicit, bool UpdateRefCount, + int64_t TgtPadding, int64_t Size, map_var_info_t HstPtrName, + bool HasFlagTo, bool HasFlagAlways, bool IsImplicit, bool UpdateRefCount, bool HasCloseModifier, bool HasPresentModifier, bool HasHoldModifier, AsyncInfoTy &AsyncInfo, HostDataToTargetTy *OwnedTPR = nullptr, bool ReleaseHDTTMap = true); diff --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp index 22ab7436f75b5..a5409e2b2b74e 100644 --- a/openmp/libomptarget/src/device.cpp +++ b/openmp/libomptarget/src/device.cpp @@ -90,6 +90,7 @@ int DeviceTy::associatePtr(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size) { /*HstPtrBase=*/(uintptr_t)HstPtrBegin, /*HstPtrBegin=*/(uintptr_t)HstPtrBegin, /*HstPtrEnd=*/(uintptr_t)HstPtrBegin + Size, + /*TgtAllocBegin=*/(uintptr_t)TgtPtrBegin, /*TgtPtrBegin=*/(uintptr_t)TgtPtrBegin, /*UseHoldRefCount=*/false, /*Name=*/nullptr, /*IsRefCountINF=*/true)) @@ -216,10 +217,10 @@ LookupResult DeviceTy::lookupMapping(HDTTMapAccessorTy &HDTTMap, TargetPointerResultTy DeviceTy::getTargetPointer( HDTTMapAccessorTy &HDTTMap, void *HstPtrBegin, void *HstPtrBase, - int64_t Size, map_var_info_t HstPtrName, bool HasFlagTo, bool HasFlagAlways, - bool IsImplicit, bool UpdateRefCount, bool HasCloseModifier, - bool HasPresentModifier, bool HasHoldModifier, AsyncInfoTy &AsyncInfo, - HostDataToTargetTy *OwnedTPR, bool ReleaseHDTTMap) { + int64_t TgtPadding, int64_t Size, map_var_info_t HstPtrName, bool HasFlagTo, + bool HasFlagAlways, bool IsImplicit, bool UpdateRefCount, + bool HasCloseModifier, bool HasPresentModifier, bool HasHoldModifier, + AsyncInfoTy &AsyncInfo, HostDataToTargetTy *OwnedTPR, bool ReleaseHDTTMap) { LookupResult LR = lookupMapping(HDTTMap, HstPtrBegin, Size, OwnedTPR); LR.TPR.Flags.IsPresent = true; @@ -297,24 +298,28 @@ TargetPointerResultTy DeviceTy::getTargetPointer( } else if (Size) { // If it is not contained and Size > 0, we should create a new entry for it. LR.TPR.Flags.IsNewEntry = true; - uintptr_t Ptr = (uintptr_t)allocData(Size, HstPtrBegin); + uintptr_t TgtAllocBegin = + (uintptr_t)allocData(TgtPadding + Size, HstPtrBegin); + uintptr_t TgtPtrBegin = TgtAllocBegin + TgtPadding; // Release the mapping table lock only after the entry is locked by // attaching it to TPR. LR.TPR.setEntry(HDTTMap ->emplace(new HostDataToTargetTy( (uintptr_t)HstPtrBase, (uintptr_t)HstPtrBegin, - (uintptr_t)HstPtrBegin + Size, Ptr, HasHoldModifier, - HstPtrName)) + (uintptr_t)HstPtrBegin + Size, TgtAllocBegin, + TgtPtrBegin, HasHoldModifier, HstPtrName)) .first->HDTT); INFO(OMP_INFOTYPE_MAPPING_CHANGED, DeviceID, "Creating new map entry with HstPtrBase=" DPxMOD - ", HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD ", Size=%ld, " - "DynRefCount=%s, HoldRefCount=%s, Name=%s\n", - DPxPTR(HstPtrBase), DPxPTR(HstPtrBegin), DPxPTR(Ptr), Size, + ", HstPtrBegin=" DPxMOD ", TgtAllocBegin=" DPxMOD + ", TgtPtrBegin=" DPxMOD + ", Size=%ld, DynRefCount=%s, HoldRefCount=%s, Name=%s\n", + DPxPTR(HstPtrBase), DPxPTR(HstPtrBegin), DPxPTR(TgtAllocBegin), + DPxPTR(TgtPtrBegin), Size, LR.TPR.getEntry()->dynRefCountToStr().c_str(), LR.TPR.getEntry()->holdRefCountToStr().c_str(), (HstPtrName) ? getNameFromMapping(HstPtrName).c_str() : "unknown"); - LR.TPR.TargetPointer = (void *)Ptr; + LR.TPR.TargetPointer = (void *)TgtPtrBegin; // Notify the plugin about the new mapping. if (notifyDataMapped(HstPtrBegin, Size)) @@ -490,8 +495,9 @@ int DeviceTy::eraseMapEntry(HDTTMapAccessorTy &HDTTMap, int DeviceTy::deallocTgtPtrAndEntry(HostDataToTargetTy *Entry, int64_t Size) { assert(Entry && "Trying to deallocate a null entry."); - DP("Deleting tgt data " DPxMOD " of size %" PRId64 "\n", - DPxPTR(Entry->TgtPtrBegin), Size); + DP("Deleting tgt data " DPxMOD " of size %" PRId64 " by freeing allocation " + "starting at " DPxMOD "\n", + DPxPTR(Entry->TgtPtrBegin), Size, DPxPTR(Entry->TgtAllocBegin)); void *Event = Entry->getEvent(); if (Event && destroyEvent(Event) != OFFLOAD_SUCCESS) { @@ -499,7 +505,7 @@ int DeviceTy::deallocTgtPtrAndEntry(HostDataToTargetTy *Entry, int64_t Size) { return OFFLOAD_FAIL; } - int Ret = deleteData((void *)Entry->TgtPtrBegin); + int Ret = deleteData((void *)Entry->TgtAllocBegin); // Notify the plugin about the unmapped memory. Ret |= notifyDataUnmapped((void *)Entry->HstPtrBegin); @@ -551,8 +557,8 @@ void *DeviceTy::allocData(int64_t Size, void *HstPtr, int32_t Kind) { return RTL->data_alloc(RTLDeviceID, Size, HstPtr, Kind); } -int32_t DeviceTy::deleteData(void *TgtPtrBegin, int32_t Kind) { - return RTL->data_delete(RTLDeviceID, TgtPtrBegin, Kind); +int32_t DeviceTy::deleteData(void *TgtAllocBegin, int32_t Kind) { + return RTL->data_delete(RTLDeviceID, TgtAllocBegin, Kind); } static void printCopyInfo(int DeviceId, bool H2D, void *SrcPtrBegin, diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp index 04201e8d7a601..9426d8a0ca76f 100644 --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -208,6 +208,7 @@ static int initLibrary(DeviceTy &Device) { (uintptr_t)CurrHostEntry->addr /*HstPtrBegin*/, (uintptr_t)CurrHostEntry->addr + CurrHostEntry->size /*HstPtrEnd*/, + (uintptr_t)CurrDeviceEntry->addr /*TgtAllocBegin*/, (uintptr_t)CurrDeviceEntry->addr /*TgtPtrBegin*/, false /*UseHoldRefCount*/, CurrHostEntry->name, true /*IsRefCountINF*/)); @@ -602,18 +603,16 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, // Adjust for proper alignment if this is a combined entry (for structs). // Look at the next argument - if that is MEMBER_OF this one, then this one // is a combined entry. - int64_t Padding = 0; + int64_t TgtPadding = 0; const int NextI = I + 1; if (getParentIndex(ArgTypes[I]) < 0 && NextI < ArgNum && getParentIndex(ArgTypes[NextI]) == I) { int64_t Alignment = getPartialStructRequiredAlignment(HstPtrBase); - Padding = (int64_t)HstPtrBegin % Alignment; - if (Padding) { + TgtPadding = (int64_t)HstPtrBegin % Alignment; + if (TgtPadding) { DP("Using a padding of %" PRId64 " bytes for begin address " DPxMOD "\n", - Padding, DPxPTR(HstPtrBegin)); - HstPtrBegin = (char *)HstPtrBegin - Padding; - DataSize += Padding; + TgtPadding, DPxPTR(HstPtrBegin)); } } @@ -653,7 +652,7 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, // PTR_AND_OBJ entry is handled below, and so the allocation might fail // when HasPresentModifier. PointerTpr = Device.getTargetPointer( - HDTTMap, HstPtrBase, HstPtrBase, sizeof(void *), + HDTTMap, HstPtrBase, HstPtrBase, /*TgtPadding=*/0, sizeof(void *), /*HstPtrName=*/nullptr, /*HasFlagTo=*/false, /*HasFlagAlways=*/false, IsImplicit, UpdateRef, HasCloseModifier, HasPresentModifier, HasHoldModifier, AsyncInfo, @@ -683,8 +682,8 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, const bool HasFlagAlways = ArgTypes[I] & OMP_TGT_MAPTYPE_ALWAYS; // Note that HDTTMap will be released in getTargetPointer. auto TPR = Device.getTargetPointer( - HDTTMap, HstPtrBegin, HstPtrBase, DataSize, HstPtrName, HasFlagTo, - HasFlagAlways, IsImplicit, UpdateRef, HasCloseModifier, + HDTTMap, HstPtrBegin, HstPtrBase, TgtPadding, DataSize, HstPtrName, + HasFlagTo, HasFlagAlways, IsImplicit, UpdateRef, HasCloseModifier, HasPresentModifier, HasHoldModifier, AsyncInfo, PointerTpr.getEntry()); void *TgtPtrBegin = TPR.TargetPointer; IsHostPtr = TPR.Flags.IsHostPointer; @@ -890,25 +889,7 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, } void *HstPtrBegin = Args[I]; - void *HstPtrBase = ArgBases[I]; int64_t DataSize = ArgSizes[I]; - // Adjust for proper alignment if this is a combined entry (for structs). - // Look at the next argument - if that is MEMBER_OF this one, then this one - // is a combined entry. - const int NextI = I + 1; - if (getParentIndex(ArgTypes[I]) < 0 && NextI < ArgNum && - getParentIndex(ArgTypes[NextI]) == I) { - int64_t Alignment = getPartialStructRequiredAlignment(HstPtrBase); - int64_t Padding = (int64_t)HstPtrBegin % Alignment; - if (Padding) { - DP("Using a Padding of %" PRId64 " bytes for begin address " DPxMOD - "\n", - Padding, DPxPTR(HstPtrBegin)); - HstPtrBegin = (char *)HstPtrBegin - Padding; - DataSize += Padding; - } - } - bool IsImplicit = ArgTypes[I] & OMP_TGT_MAPTYPE_IMPLICIT; bool UpdateRef = (!(ArgTypes[I] & OMP_TGT_MAPTYPE_MEMBER_OF) || (ArgTypes[I] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) && diff --git a/openmp/libomptarget/test/mapping/padding_not_mapped.c b/openmp/libomptarget/test/mapping/padding_not_mapped.c new file mode 100644 index 0000000000000..9d5ef212ffc24 --- /dev/null +++ b/openmp/libomptarget/test/mapping/padding_not_mapped.c @@ -0,0 +1,43 @@ +// RUN: %libomptarget-compile-generic -fopenmp-version=51 +// RUN: %libomptarget-run-fail-generic 2>&1 \ +// RUN: | %fcheck-generic + +// The host memory layout for the following program looks like this: +// +// | 4 bytes | 4 bytes | 8 bytes | +// | s.x | s.y | s.z | +// `-----------------------------' +// +// s is always at least 8-byte aligned in host memory due to s.z, so +// libomptarget's device padding for map(s.y,s.z) always maps to host memory +// that includes s.x. At one time, s.x appeared to be mapped as a result, but +// libomptarget has since been fixed not to consider device padding as mapped to +// host memory. + +#include +#include + +int main() { + struct S { int x; int y; double z; } s = {1, 2, 3}; + + // CHECK: &s.x = 0x[[#%x,HOST_ADDR:]], size = [[#%u,SIZE:]] + fprintf(stderr, "&s = %p\n", &s); + fprintf(stderr, "&s.x = %p, size = %ld\n", &s.x, sizeof s.x); + fprintf(stderr, "&s.y = %p\n", &s.y); + fprintf(stderr, "&s.z = %p\n", &s.z); + + // CHECK: s.x is present: 0 + // CHECK: s.x = 1{{$}} + #pragma omp target enter data map(alloc: s.y, s.z) + int dev = omp_get_default_device(); + fprintf(stderr, "s.x is present: %d\n", omp_target_is_present(&s.x, dev)); + #pragma omp target update from(s.x) // should have no effect + fprintf(stderr, "s.x = %d\n", s.x); + + // CHECK: Libomptarget message: device mapping required by 'present' map type modifier does not exist for host address 0x{{0*}}[[#HOST_ADDR]] ([[#SIZE]] bytes) + // CHECK: Libomptarget error: Call to getTargetPointer returned null pointer ('present' map type modifier). + // CHECK: Libomptarget fatal error 1: failure of target construct while offloading is mandatory + #pragma omp target enter data map(present, alloc: s.x) + + return 0; +} diff --git a/openmp/libomptarget/test/mapping/power_of_two_alignment.c b/openmp/libomptarget/test/mapping/power_of_two_alignment.c index 06b0e457983fb..faebe4f89fd9d 100644 --- a/openmp/libomptarget/test/mapping/power_of_two_alignment.c +++ b/openmp/libomptarget/test/mapping/power_of_two_alignment.c @@ -35,7 +35,9 @@ // padding for s, libomptarget reported an array extension error. collidePost // is never fully contained within that padding (which would avoid the extension // error) because collidePost is 16 bytes while the padding is always less than -// 16 bytes due to the modulo operations. +// 16 bytes due to the modulo operations. (Later, libomptarget was changed not +// to consider padding to be mapped to the host, so it cannot be involved in +// array extension errors.) #include #include