Expand Up
@@ -13,16 +13,105 @@
#define OMPTARGET_STATE_H
#include " Debug.h"
#include " Mapping.h"
#include " Types.h"
#include " Utils.h"
#pragma omp begin declare target device_type(nohost)
namespace _OMP {
namespace memory {
// / Alloca \p Size bytes in shared memory, if possible, for \p Reason.
// /
// / Note: See the restrictions on __kmpc_alloc_shared for proper usage.
void *allocShared (uint64_t Size , const char *Reason);
// / Free \p Ptr, alloated via allocShared, for \p Reason.
// /
// / Note: See the restrictions on __kmpc_free_shared for proper usage.
void freeShared (void *Ptr , uint64_t Bytes, const char *Reason);
// / Alloca \p Size bytes in global memory, if possible, for \p Reason.
void *allocGlobal (uint64_t Size , const char *Reason);
// / Return a pointer to the dynamic shared memory buffer.
void *getDynamicBuffer ();
// / Free \p Ptr, alloated via allocGlobal, for \p Reason.
void freeGlobal (void *Ptr , const char *Reason);
} // namespace memory
namespace state {
inline constexpr uint32_t SharedScratchpadSize = SHARED_SCRATCHPAD_SIZE;
struct ICVStateTy {
uint32_t NThreadsVar;
uint32_t LevelVar;
uint32_t ActiveLevelVar;
uint32_t MaxActiveLevelsVar;
uint32_t RunSchedVar;
uint32_t RunSchedChunkVar;
bool operator ==(const ICVStateTy &Other) const ;
void assertEqual (const ICVStateTy &Other) const ;
};
struct TeamStateTy {
void init (bool IsSPMD);
bool operator ==(const TeamStateTy &) const ;
void assertEqual (TeamStateTy &Other) const ;
// / ICVs
// /
// / Preallocated storage for ICV values that are used if the threads have not
// / set a custom default. The latter is supported but unlikely and slow(er).
// /
// /{
ICVStateTy ICVState;
// /}
uint32_t ParallelTeamSize;
uint32_t HasThreadState;
ParallelRegionFnTy ParallelRegionFnVar;
};
extern TeamStateTy TeamState;
#pragma omp allocate(TeamState) allocator(omp_pteam_mem_alloc)
struct ThreadStateTy {
// / ICVs have preallocated storage in the TeamStateTy which is used if a
// / thread has not set a custom value. The latter is supported but unlikely.
// / When it happens we will allocate dynamic memory to hold the values of all
// / ICVs. Thus, the first time an ICV is set by a thread we will allocate an
// / ICV struct to hold them all. This is slower than alternatives but allows
// / users to pay only for what they use.
// /
state::ICVStateTy ICVState;
ThreadStateTy *PreviousThreadState;
void init () {
ICVState = TeamState.ICVState ;
PreviousThreadState = nullptr ;
}
void init (ThreadStateTy *PreviousTS) {
ICVState = PreviousTS ? PreviousTS->ICVState : TeamState.ICVState ;
PreviousThreadState = PreviousTS;
}
};
extern ThreadStateTy *ThreadStates[mapping::MaxThreadsPerTeam];
#pragma omp allocate(ThreadStates) allocator(omp_pteam_mem_alloc)
// / Initialize the state machinery. Must be called by all threads.
void init (bool IsSPMD);
Expand All
@@ -37,6 +126,7 @@ enum ValueKind {
VK_RunSchedChunk,
VK_ParallelRegionFn,
VK_ParallelTeamSize,
VK_HasThreadState,
};
// / TODO
Expand All
@@ -54,14 +144,90 @@ struct DateEnvironmentRAII {
// / TODO
void resetStateForThread (uint32_t TId);
uint32_t &lookup32 (ValueKind VK, bool IsReadonly, IdentTy *Ident);
void *&lookupPtr (ValueKind VK, bool IsReadonly);
inline uint32_t &lookupForModify32Impl (uint32_t state::ICVStateTy::*Var,
IdentTy *Ident, bool ForceTeamState) {
if (OMP_LIKELY (ForceTeamState || !config::mayUseThreadStates () ||
!TeamState.HasThreadState ))
return TeamState.ICVState .*Var;
uint32_t TId = mapping::getThreadIdInBlock ();
if (OMP_UNLIKELY (!ThreadStates[TId])) {
ThreadStates[TId] = reinterpret_cast <ThreadStateTy *>(memory::allocGlobal (
sizeof (ThreadStateTy), " ICV modification outside data environment" ));
ASSERT (ThreadStates[TId] != nullptr && " Nullptr returned by malloc!" );
TeamState.HasThreadState = true ;
ThreadStates[TId]->init ();
}
return ThreadStates[TId]->ICVState .*Var;
}
inline uint32_t &lookupImpl (uint32_t state::ICVStateTy::*Var,
bool ForceTeamState) {
auto TId = mapping::getThreadIdInBlock ();
if (OMP_UNLIKELY (!ForceTeamState && config::mayUseThreadStates () &&
TeamState.HasThreadState && ThreadStates[TId]))
return ThreadStates[TId]->ICVState .*Var;
return TeamState.ICVState .*Var;
}
__attribute__ ((always_inline, flatten)) inline uint32_t &
lookup32 (ValueKind Kind, bool IsReadonly, IdentTy *Ident, bool ForceTeamState) {
switch (Kind) {
case state::VK_NThreads:
if (IsReadonly)
return lookupImpl (&ICVStateTy::NThreadsVar, ForceTeamState);
return lookupForModify32Impl (&ICVStateTy::NThreadsVar, Ident,
ForceTeamState);
case state::VK_Level:
if (IsReadonly)
return lookupImpl (&ICVStateTy::LevelVar, ForceTeamState);
return lookupForModify32Impl (&ICVStateTy::LevelVar, Ident, ForceTeamState);
case state::VK_ActiveLevel:
if (IsReadonly)
return lookupImpl (&ICVStateTy::ActiveLevelVar, ForceTeamState);
return lookupForModify32Impl (&ICVStateTy::ActiveLevelVar, Ident,
ForceTeamState);
case state::VK_MaxActiveLevels:
if (IsReadonly)
return lookupImpl (&ICVStateTy::MaxActiveLevelsVar, ForceTeamState);
return lookupForModify32Impl (&ICVStateTy::MaxActiveLevelsVar, Ident,
ForceTeamState);
case state::VK_RunSched:
if (IsReadonly)
return lookupImpl (&ICVStateTy::RunSchedVar, ForceTeamState);
return lookupForModify32Impl (&ICVStateTy::RunSchedVar, Ident,
ForceTeamState);
case state::VK_RunSchedChunk:
if (IsReadonly)
return lookupImpl (&ICVStateTy::RunSchedChunkVar, ForceTeamState);
return lookupForModify32Impl (&ICVStateTy::RunSchedChunkVar, Ident,
ForceTeamState);
case state::VK_ParallelTeamSize:
return TeamState.ParallelTeamSize ;
case state::VK_HasThreadState:
return TeamState.HasThreadState ;
default :
break ;
}
__builtin_unreachable ();
}
__attribute__ ((always_inline, flatten)) inline void *&
lookupPtr (ValueKind Kind, bool IsReadonly, bool ForceTeamState) {
switch (Kind) {
case state::VK_ParallelRegionFn:
return TeamState.ParallelRegionFnVar ;
default :
break ;
}
__builtin_unreachable ();
}
// / A class without actual state used to provide a nice interface to lookup and
// / update ICV values we can declare in global scope.
template <typename Ty, ValueKind Kind> struct Value {
__attribute__ ((flatten, always_inline)) operator Ty () {
return lookup (/* IsReadonly */ true , /* IdentTy */ nullptr );
return lookup (/* IsReadonly */ true , /* IdentTy */ nullptr ,
/* ForceTeamState */ false );
}
__attribute__ ((flatten, always_inline)) Value &operator =(const Ty &Other) {
Expand All
@@ -79,21 +245,29 @@ template <typename Ty, ValueKind Kind> struct Value {
return *this ;
}
__attribute__ ((flatten, always_inline)) void
assert_eq (const Ty &V, IdentTy *Ident = nullptr ,
bool ForceTeamState = false ) {
ASSERT (lookup (/* IsReadonly */ true , Ident, ForceTeamState) == V);
}
private:
__attribute__ ((flatten, always_inline)) Ty &lookup ( bool IsReadonly,
IdentTy *Ident) {
Ty &t = lookup32 (Kind, IsReadonly, Ident);
__attribute__ ((flatten, always_inline)) Ty &
lookup ( bool IsReadonly, IdentTy *Ident, bool ForceTeamState ) {
Ty &t = lookup32 (Kind, IsReadonly, Ident, ForceTeamState );
return t;
}
__attribute__ ((flatten, always_inline)) Ty &inc (int UpdateVal,
IdentTy *Ident) {
return (lookup (/* IsReadonly */ false , Ident) += UpdateVal);
return (lookup (/* IsReadonly */ false , Ident, /* ForceTeamState */ false ) +=
UpdateVal);
}
__attribute__ ((flatten, always_inline)) Ty &set (Ty UpdateVal,
IdentTy *Ident) {
return (lookup (/* IsReadonly */ false , Ident) = UpdateVal);
return (lookup (/* IsReadonly */ false , Ident, /* ForceTeamState */ false ) =
UpdateVal);
}
template <typename VTy, typename Ty2> friend struct ValueRAII ;
Expand All
@@ -104,7 +278,8 @@ template <typename Ty, ValueKind Kind> struct Value {
// / we can declare in global scope.
template <typename Ty, ValueKind Kind> struct PtrValue {
__attribute__ ((flatten, always_inline)) operator Ty () {
return lookup (/* IsReadonly */ true , /* IdentTy */ nullptr );
return lookup (/* IsReadonly */ true , /* IdentTy */ nullptr ,
/* ForceTeamState */ false );
}
__attribute__ ((flatten, always_inline)) PtrValue &operator =(const Ty Other) {
Expand All
@@ -113,18 +288,23 @@ template <typename Ty, ValueKind Kind> struct PtrValue {
}
private:
Ty &lookup (bool IsReadonly, IdentTy *) { return lookupPtr (Kind, IsReadonly); }
Ty &lookup (bool IsReadonly, IdentTy *, bool ForceTeamState) {
return lookupPtr (Kind, IsReadonly, ForceTeamState);
}
Ty &set (Ty UpdateVal) {
return (lookup (/* IsReadonly */ false , /* IdentTy */ nullptr ) = UpdateVal);
return (lookup (/* IsReadonly */ false , /* IdentTy */ nullptr ,
/* ForceTeamState */ false ) = UpdateVal);
}
template <typename VTy, typename Ty2> friend struct ValueRAII ;
};
template <typename VTy, typename Ty> struct ValueRAII {
ValueRAII (VTy &V, Ty NewValue, Ty OldValue, bool Active, IdentTy *Ident)
: Ptr (Active ? &V.lookup(/* IsReadonly */ false , Ident) : nullptr ),
ValueRAII (VTy &V, Ty NewValue, Ty OldValue, bool Active, IdentTy *Ident,
bool ForceTeamState = false )
: Ptr (Active ? &V.lookup(/* IsReadonly */ false , Ident, ForceTeamState)
: (Ty *)utils::UndefPtr),
Val (OldValue), Active(Active) {
if (!Active)
return ;
Expand All
@@ -149,6 +329,9 @@ inline state::Value<uint32_t, state::VK_RunSchedChunk> RunSchedChunk;
// / TODO
inline state::Value<uint32_t , state::VK_ParallelTeamSize> ParallelTeamSize;
// / TODO
inline state::Value<uint32_t , state::VK_HasThreadState> HasThreadState;
// / TODO
inline state::PtrValue<ParallelRegionFnTy, state::VK_ParallelRegionFn>
ParallelRegionFn;
Expand Down
Expand Up
@@ -181,29 +364,6 @@ inline state::Value<uint32_t, state::VK_RunSched> RunSched;
} // namespace icv
namespace memory {
// / Alloca \p Size bytes in shared memory, if possible, for \p Reason.
// /
// / Note: See the restrictions on __kmpc_alloc_shared for proper usage.
void *allocShared (uint64_t Size , const char *Reason);
// / Free \p Ptr, alloated via allocShared, for \p Reason.
// /
// / Note: See the restrictions on __kmpc_free_shared for proper usage.
void freeShared (void *Ptr , uint64_t Bytes, const char *Reason);
// / Alloca \p Size bytes in global memory, if possible, for \p Reason.
void *allocGlobal (uint64_t Size , const char *Reason);
// / Return a pointer to the dynamic shared memory buffer.
void *getDynamicBuffer ();
// / Free \p Ptr, alloated via allocGlobal, for \p Reason.
void freeGlobal (void *Ptr , const char *Reason);
} // namespace memory
} // namespace _OMP
#pragma omp end declare target
Expand Down