Skip to content

Commit

Permalink
[OpenMP] Remove shadow pointer map and introduce consistent locking
Browse files Browse the repository at this point in the history
The shadow pointer map was problematic as we scanned an entire list if
an entry had shadow pointers. The new scheme stores the shadow pointers
inside the entries. This allows easy access without any search. It also
helps us, but also makes it necessary, to define a consistent locking
scheme. The implicit locking of entries via TargetPointerResultTy makes
this pretty effortless, however one has to:

- Lock HDTTMap before locking an entry.
- Do not lock HDTTMap while holding an entry lock.
- Hold the entry lock to read or modify an entry.

The changes to submitData and retrieveData have been made to ensure 2
when the LIBOMPTARGET_INFO flag is used. Most everything else happens by
itself as TargetPointerResultTy acts as a lock_guard for the entry. It
is a little complicated when we deal with multiple entries, especially
as they can be equal. However, one can still follow the rules with
reasonable effort.

LookupResult are now finally also locking the entry before it is
inspected. This is good even if we haven't run into a problem yet.

Differential Revision: https://reviews.llvm.org/D123446
  • Loading branch information
jdoerfert committed Mar 22, 2023
1 parent 0153ab6 commit f2c3859
Show file tree
Hide file tree
Showing 4 changed files with 435 additions and 398 deletions.
197 changes: 137 additions & 60 deletions openmp/libomptarget/include/device.h
Expand Up @@ -16,6 +16,7 @@
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <list>
#include <map>
#include <memory>
Expand All @@ -26,6 +27,7 @@
#include "ExclusiveAccess.h"
#include "omptarget.h"
#include "rtl.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"

// Forward declarations.
Expand All @@ -43,6 +45,22 @@ enum kmp_target_offload_kind {
};
typedef enum kmp_target_offload_kind kmp_target_offload_kind_t;

/// Information about shadow pointers.
struct ShadowPtrInfoTy {
void **HstPtrAddr = nullptr;
void *HstPtrVal = nullptr;
void **TgtPtrAddr = nullptr;
void *TgtPtrVal = nullptr;

bool operator==(const ShadowPtrInfoTy &Other) const {
return HstPtrAddr == Other.HstPtrAddr;
}
};

inline bool operator<(const ShadowPtrInfoTy &lhs, const ShadowPtrInfoTy &rhs) {
return lhs.HstPtrAddr < rhs.HstPtrAddr;
}

/// Map between host data and target data.
struct HostDataToTargetTy {
const uintptr_t HstPtrBase; // host info.
Expand All @@ -60,8 +78,7 @@ struct HostDataToTargetTy {

struct StatesTy {
StatesTy(uint64_t DRC, uint64_t HRC)
: DynRefCount(DRC), HoldRefCount(HRC),
MayContainAttachedPointers(false) {}
: DynRefCount(DRC), HoldRefCount(HRC) {}
/// The dynamic reference count is the standard reference count as of OpenMP
/// 4.5. The hold reference count is an OpenMP extension for the sake of
/// OpenACC support.
Expand All @@ -80,17 +97,10 @@ struct HostDataToTargetTy {
uint64_t DynRefCount;
uint64_t HoldRefCount;

/// Boolean flag to remember if any subpart of the mapped region might be
/// an attached pointer.
bool MayContainAttachedPointers;

/// This mutex will be locked when data movement is issued. For targets that
/// doesn't support async data movement, this mutex can guarantee that after
/// it is released, memory region on the target is update to date. For
/// targets that support async data movement, this can guarantee that data
/// movement has been issued. This mutex *must* be locked right before
/// releasing the mapping table lock.
std::mutex UpdateMtx;
/// A map of shadow pointers associated with this entry, the keys are host
/// pointer addresses to identify stale entries.
llvm::SmallSet<ShadowPtrInfoTy, 2> ShadowPtrInfos;

/// Pointer to the event corresponding to the data update of this map.
/// Note: At present this event is created when the first data transfer from
/// host to device is issued, and only being used for H2D. It is not used
Expand Down Expand Up @@ -222,16 +232,41 @@ struct HostDataToTargetTy {
return ThisRefCount == 1;
}

void setMayContainAttachedPointers() const {
States->MayContainAttachedPointers = true;
/// Add the shadow pointer info \p ShadowPtrInfo to this entry but only if the
/// the target ptr value was not already present in the existing set of shadow
/// pointers. Return true if something was added.
bool addShadowPointer(const ShadowPtrInfoTy &ShadowPtrInfo) const {
auto Pair = States->ShadowPtrInfos.insert(ShadowPtrInfo);
if (Pair.second)
return true;
// Check for a stale entry, if found, replace the old one.
if ((*Pair.first).TgtPtrVal == ShadowPtrInfo.TgtPtrVal)
return false;
States->ShadowPtrInfos.erase(ShadowPtrInfo);
return addShadowPointer(ShadowPtrInfo);
}
bool getMayContainAttachedPointers() const {
return States->MayContainAttachedPointers;

/// Apply \p CB to all shadow pointers of this entry. Returns OFFLOAD_FAIL if
/// \p CB returned OFFLOAD_FAIL for any of them, otherwise this returns
/// OFFLOAD_SUCCESS. The entry is locked for this operation.
template <typename CBTy> int foreachShadowPointerInfo(CBTy CB) const {
for (auto &It : States->ShadowPtrInfos)
if (CB(It) == OFFLOAD_FAIL)
return OFFLOAD_FAIL;
return OFFLOAD_SUCCESS;
}

void lock() const { States->UpdateMtx.lock(); }
/// Lock this entry for exclusive access. Ensure to get exclusive access to
/// HDTTMap first!
void lock() const { Mtx.lock(); }

/// Unlock this entry to allow other threads inspecting it.
void unlock() const { Mtx.unlock(); }

void unlock() const { States->UpdateMtx.unlock(); }
private:
// Mutex that needs to be held before the entry is inspected or modified. The
// HDTTMap mutex needs to be held before trying to lock any HDTT Entry.
mutable std::mutex Mtx;
};

/// Wrapper around the HostDataToTargetTy to be used in the HDTT map. In
Expand All @@ -243,6 +278,7 @@ struct HostDataToTargetMapKeyTy {
uintptr_t KeyValue;

HostDataToTargetMapKeyTy(void *Key) : KeyValue(uintptr_t(Key)) {}
HostDataToTargetMapKeyTy(uintptr_t Key) : KeyValue(Key) {}
HostDataToTargetMapKeyTy(HostDataToTargetTy *HDTT)
: KeyValue(HDTT->HstPtrBegin), HDTT(HDTT) {}
HostDataToTargetTy *HDTT;
Expand All @@ -260,49 +296,89 @@ inline bool operator<(const HostDataToTargetMapKeyTy &LHS,
return LHS.KeyValue < RHS.KeyValue;
}

struct LookupResult {
struct {
unsigned IsContained : 1;
unsigned ExtendsBefore : 1;
unsigned ExtendsAfter : 1;
} Flags;

/// The corresponding map table entry which is stable.
HostDataToTargetTy *Entry = nullptr;

LookupResult() : Flags({0, 0, 0}), Entry() {}
};

/// This struct will be returned by \p DeviceTy::getTargetPointer which provides
/// more data than just a target pointer.
/// more data than just a target pointer. A TargetPointerResultTy that has a non
/// null Entry owns the entry. As long as the TargetPointerResultTy (TPR) exists
/// the entry is locked. To give up ownership without destroying the TPR use the
/// reset() function.
struct TargetPointerResultTy {
struct {
struct FlagTy {
/// If the map table entry is just created
unsigned IsNewEntry : 1;
/// If the pointer is actually a host pointer (when unified memory enabled)
unsigned IsHostPointer : 1;
/// If the pointer is present in the mapping table.
unsigned IsPresent : 1;
} Flags = {0, 0, 0};
/// Flag indicating that this was the last user of the entry and the ref
/// count is now 0.
unsigned IsLast : 1;
} Flags = {0, 0, 0, 0};

TargetPointerResultTy(const TargetPointerResultTy &) = delete;
TargetPointerResultTy &operator=(const TargetPointerResultTy &TPR) = delete;
TargetPointerResultTy() {}

TargetPointerResultTy(FlagTy Flags, HostDataToTargetTy *Entry,
void *TargetPointer)
: Flags(Flags), TargetPointer(TargetPointer), Entry(Entry) {
if (Entry)
Entry->lock();
}

TargetPointerResultTy(TargetPointerResultTy &&TPR)
: Flags(TPR.Flags), TargetPointer(TPR.TargetPointer), Entry(TPR.Entry) {
TPR.Entry = nullptr;
}

TargetPointerResultTy &operator=(TargetPointerResultTy &&TPR) {
if (&TPR != this) {
std::swap(Flags, TPR.Flags);
std::swap(Entry, TPR.Entry);
std::swap(TargetPointer, TPR.TargetPointer);
}
return *this;
}

~TargetPointerResultTy() {
if (Entry)
Entry->unlock();
}

bool isPresent() const { return Flags.IsPresent; }

bool isHostPointer() const { return Flags.IsHostPointer; }

/// The corresponding map table entry which is stable.
HostDataToTargetTy *Entry = nullptr;

/// The corresponding target pointer
void *TargetPointer = nullptr;

HostDataToTargetTy *getEntry() const { return Entry; }
void setEntry(HostDataToTargetTy *HDTTT,
HostDataToTargetTy *OwnedTPR = nullptr) {
if (Entry)
Entry->unlock();
Entry = HDTTT;
if (Entry && Entry != OwnedTPR)
Entry->lock();
}

void reset() { *this = TargetPointerResultTy(); }

private:
/// The corresponding map table entry which is stable.
HostDataToTargetTy *Entry = nullptr;
};

/// Map for shadow pointers
struct ShadowPtrValTy {
void *HstPtrVal;
void *TgtPtrAddr;
void *TgtPtrVal;
struct LookupResult {
struct {
unsigned IsContained : 1;
unsigned ExtendsBefore : 1;
unsigned ExtendsAfter : 1;
} Flags;

LookupResult() : Flags({0, 0, 0}), TPR() {}

TargetPointerResultTy TPR;
};
typedef std::map<void *, ShadowPtrValTy> ShadowPtrListTy;

///
struct PendingCtorDtorListsTy {
Expand Down Expand Up @@ -336,9 +412,7 @@ struct DeviceTy {

PendingCtorsDtorsPerLibrary PendingCtorsDtors;

ShadowPtrListTy ShadowPtrMap;

std::mutex PendingGlobalsMtx, ShadowMtx;
std::mutex PendingGlobalsMtx;

DeviceTy(RTLInfoTy *RTL);
// DeviceTy is not copyable
Expand All @@ -353,7 +427,8 @@ struct DeviceTy {
/// Lookup the mapping of \p HstPtrBegin in \p HDTTMap. The accessor ensures
/// exclusive access to the HDTT map.
LookupResult lookupMapping(HDTTMapAccessorTy &HDTTMap, void *HstPtrBegin,
int64_t Size);
int64_t Size,
HostDataToTargetTy *OwnedTPR = nullptr);

/// Get the target pointer based on host pointer begin and base. If the
/// mapping already exists, the target pointer will be returned directly. In
Expand All @@ -365,12 +440,13 @@ struct DeviceTy {
/// - Data allocation failed;
/// - The user tried to do an illegal mapping;
/// - Data transfer issue fails.
TargetPointerResultTy
getTargetPointer(void *HstPtrBegin, void *HstPtrBase, int64_t Size,
map_var_info_t HstPtrName, bool HasFlagTo,
bool HasFlagAlways, bool IsImplicit, bool UpdateRefCount,
bool HasCloseModifier, bool HasPresentModifier,
bool HasHoldModifier, AsyncInfoTy &AsyncInfo);
TargetPointerResultTy getTargetPointer(
HDTTMapAccessorTy &HDTTMap, void *HstPtrBegin, void *HstPtrBase,
int64_t Size, map_var_info_t HstPtrName, bool HasFlagTo,
bool HasFlagAlways, bool IsImplicit, bool UpdateRefCount,
bool HasCloseModifier, bool HasPresentModifier, bool HasHoldModifier,
AsyncInfoTy &AsyncInfo, HostDataToTargetTy *OwnedTPR = nullptr,
bool ReleaseHDTTMap = true);

/// Return the target pointer for \p HstPtrBegin in \p HDTTMap. The accessor
/// ensures exclusive access to the HDTT map.
Expand All @@ -388,10 +464,9 @@ struct DeviceTy {
/// - \p FromDataEnd tracks the number of threads referencing the entry at
/// targetDataEnd for delayed deletion purpose.
[[nodiscard]] TargetPointerResultTy
getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast,
bool UpdateRefCount, bool UseHoldRefCount, bool &IsHostPtr,
bool MustContain = false, bool ForceDelete = false,
bool FromDataEnd = false);
getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool UpdateRefCount,
bool UseHoldRefCount, bool MustContain = false,
bool ForceDelete = false, bool FromDataEnd = false);

/// Remove the \p Entry from the data map. Expect the entry's total reference
/// count to be zero and the caller thread to be the last one using it. \p
Expand Down Expand Up @@ -436,10 +511,12 @@ struct DeviceTy {
// synchronous.
// Copy data from host to device
int32_t submitData(void *TgtPtrBegin, void *HstPtrBegin, int64_t Size,
AsyncInfoTy &AsyncInfo);
AsyncInfoTy &AsyncInfo,
HostDataToTargetTy *Entry = nullptr);
// Copy data from device back to host
int32_t retrieveData(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size,
AsyncInfoTy &AsyncInfo);
AsyncInfoTy &AsyncInfo,
HostDataToTargetTy *Entry = nullptr);
// Copy data from current device to destination device directly
int32_t dataExchange(void *SrcPtr, DeviceTy &DstDev, void *DstPtr,
int64_t Size, AsyncInfoTy &AsyncInfo);
Expand Down
18 changes: 6 additions & 12 deletions openmp/libomptarget/src/api.cpp
Expand Up @@ -116,16 +116,13 @@ EXTERN int omp_target_is_present(const void *Ptr, int DeviceNum) {
}

DeviceTy &Device = *PM->Devices[DeviceNum];
bool IsLast; // not used
bool IsHostPtr;
// omp_target_is_present tests whether a host pointer refers to storage that
// is mapped to a given device. However, due to the lack of the storage size,
// only check 1 byte. Cannot set size 0 which checks whether the pointer (zero
// lengh array) is mapped instead of the referred storage.
TargetPointerResultTy TPR =
Device.getTgtPtrBegin(const_cast<void *>(Ptr), 1, IsLast,
/*UpdateRefCount=*/false,
/*UseHoldRefCount=*/false, IsHostPtr);
TargetPointerResultTy TPR = Device.getTgtPtrBegin(const_cast<void *>(Ptr), 1,
/*UpdateRefCount=*/false,
/*UseHoldRefCount=*/false);
int Rc = TPR.isPresent();
DP("Call to omp_target_is_present returns %d\n", Rc);
return Rc;
Expand Down Expand Up @@ -360,13 +357,10 @@ EXTERN void *omp_get_mapped_ptr(const void *Ptr, int DeviceNum) {
return nullptr;
}

bool IsLast = false;
bool IsHostPtr = false;
auto &Device = *PM->Devices[DeviceNum];
TargetPointerResultTy TPR =
Device.getTgtPtrBegin(const_cast<void *>(Ptr), 1, IsLast,
/*UpdateRefCount=*/false,
/*UseHoldRefCount=*/false, IsHostPtr);
TargetPointerResultTy TPR = Device.getTgtPtrBegin(const_cast<void *>(Ptr), 1,
/*UpdateRefCount=*/false,
/*UseHoldRefCount=*/false);
if (!TPR.isPresent()) {
DP("Ptr " DPxMOD "is not present on device %d, returning nullptr.\n",
DPxPTR(Ptr), DeviceNum);
Expand Down

0 comments on commit f2c3859

Please sign in to comment.