Skip to content

Commit

Permalink
[Libomptarget] Address existing warnings in the device runtime library
Browse files Browse the repository at this point in the history
This patche attemps to address the current warnings in the OpenMP
offloading device runtime. Previously we did not see these because we
compiled the runtime without the standard warning flags enabled.
However, these warnings are used when we now build the static library
version of this runtime. This became extremely noisy when coupled with
the fact the we compile each file roughly 32 times when all the
architectures are considered. So it would be ideal to not have all these
warnings show up when building.

Most of these errors were simply implicit switch-case fallthroughs,
which can be addressed using C++17's fallthrough attribute. Additionally
there was a volatile variable that was being casted away. This is most
likely safe to remove because we cast it away before its even used and
didn't seem to affect anything in testing.

Depends on D125260

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D125339
  • Loading branch information
jhuber6 committed May 13, 2022
1 parent b4f8443 commit ce0caf4
Show file tree
Hide file tree
Showing 6 changed files with 39 additions and 46 deletions.
4 changes: 2 additions & 2 deletions openmp/libomptarget/DeviceRTL/src/Debug.cpp
Expand Up @@ -38,7 +38,7 @@ int32_t omp_vprintf(const char *Format, void *Arguments, uint32_t);
device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)})
int32_t vprintf(const char *, void *);
namespace impl {
static int32_t omp_vprintf(const char *Format, void *Arguments, uint32_t) {
int32_t omp_vprintf(const char *Format, void *Arguments, uint32_t) {
return vprintf(Format, Arguments);
}
} // namespace impl
Expand All @@ -47,7 +47,7 @@ static int32_t omp_vprintf(const char *Format, void *Arguments, uint32_t) {
// We do not have a vprintf implementation for AMD GPU yet so we use a stub.
#pragma omp begin declare variant match(device = {arch(amdgcn)})
namespace impl {
static int32_t omp_vprintf(const char *Format, void *Arguments, uint32_t) {
int32_t omp_vprintf(const char *Format, void *Arguments, uint32_t) {
return -1;
}
} // namespace impl
Expand Down
6 changes: 2 additions & 4 deletions openmp/libomptarget/DeviceRTL/src/Mapping.cpp
Expand Up @@ -46,7 +46,7 @@ uint32_t getNumberOfWarpsInBlock();
///{
#pragma omp begin declare variant match(device = {arch(amdgcn)})

static const llvm::omp::GV &getGridValue() {
const llvm::omp::GV &getGridValue() {
return llvm::omp::getAMDGPUGridValues<__AMDGCN_WAVEFRONT_SIZE>();
}

Expand Down Expand Up @@ -121,9 +121,7 @@ uint32_t getNumHardwareThreadsInBlock() {
return __nvvm_read_ptx_sreg_ntid_x();
}

static const llvm::omp::GV &getGridValue() {
return llvm::omp::NVPTXGridValues;
}
const llvm::omp::GV &getGridValue() { return llvm::omp::NVPTXGridValues; }

LaneMaskTy activemask() {
unsigned int Mask;
Expand Down
32 changes: 16 additions & 16 deletions openmp/libomptarget/DeviceRTL/src/Parallelism.cpp
Expand Up @@ -158,52 +158,52 @@ void __kmpc_parallel_51(IdentTy *ident, int32_t, int32_t if_expr,
break;
case 16:
GlobalArgs[15] = args[15];
// FALLTHROUGH
[[fallthrough]];
case 15:
GlobalArgs[14] = args[14];
// FALLTHROUGH
[[fallthrough]];
case 14:
GlobalArgs[13] = args[13];
// FALLTHROUGH
[[fallthrough]];
case 13:
GlobalArgs[12] = args[12];
// FALLTHROUGH
[[fallthrough]];
case 12:
GlobalArgs[11] = args[11];
// FALLTHROUGH
[[fallthrough]];
case 11:
GlobalArgs[10] = args[10];
// FALLTHROUGH
[[fallthrough]];
case 10:
GlobalArgs[9] = args[9];
// FALLTHROUGH
[[fallthrough]];
case 9:
GlobalArgs[8] = args[8];
// FALLTHROUGH
[[fallthrough]];
case 8:
GlobalArgs[7] = args[7];
// FALLTHROUGH
[[fallthrough]];
case 7:
GlobalArgs[6] = args[6];
// FALLTHROUGH
[[fallthrough]];
case 6:
GlobalArgs[5] = args[5];
// FALLTHROUGH
[[fallthrough]];
case 5:
GlobalArgs[4] = args[4];
// FALLTHROUGH
[[fallthrough]];
case 4:
GlobalArgs[3] = args[3];
// FALLTHROUGH
[[fallthrough]];
case 3:
GlobalArgs[2] = args[2];
// FALLTHROUGH
[[fallthrough]];
case 2:
GlobalArgs[1] = args[1];
// FALLTHROUGH
[[fallthrough]];
case 1:
GlobalArgs[0] = args[0];
// FALLTHROUGH
[[fallthrough]];
case 0:
break;
}
Expand Down
12 changes: 5 additions & 7 deletions openmp/libomptarget/DeviceRTL/src/Reduction.cpp
Expand Up @@ -167,8 +167,8 @@ uint32_t roundToWarpsize(uint32_t s) {

uint32_t kmpcMin(uint32_t x, uint32_t y) { return x < y ? x : y; }

static volatile uint32_t IterCnt = 0;
static volatile uint32_t Cnt = 0;
static uint32_t IterCnt = 0;
static uint32_t Cnt = 0;

} // namespace

Expand Down Expand Up @@ -211,7 +211,7 @@ int32_t __kmpc_nvptx_teams_reduce_nowait_v2(
// to the number of slots in the buffer.
bool IsMaster = (ThreadId == 0);
while (IsMaster) {
Bound = atomic::load((uint32_t *)&IterCnt, __ATOMIC_SEQ_CST);
Bound = atomic::load(&IterCnt, __ATOMIC_SEQ_CST);
if (TeamId < Bound + num_of_records)
break;
}
Expand All @@ -228,8 +228,7 @@ int32_t __kmpc_nvptx_teams_reduce_nowait_v2(
// Increment team counter.
// This counter is incremented by all teams in the current
// BUFFER_SIZE chunk.
ChunkTeamCount =
atomic::inc((uint32_t *)&Cnt, num_of_records - 1u, __ATOMIC_SEQ_CST);
ChunkTeamCount = atomic::inc(&Cnt, num_of_records - 1u, __ATOMIC_SEQ_CST);
}
// Synchronize
if (mapping::isSPMDMode())
Expand Down Expand Up @@ -305,8 +304,7 @@ int32_t __kmpc_nvptx_teams_reduce_nowait_v2(
if (IsMaster && ChunkTeamCount == num_of_records - 1) {
// Allow SIZE number of teams to proceed writing their
// intermediate results to the global buffer.
atomic::add((uint32_t *)&IterCnt, uint32_t(num_of_records),
__ATOMIC_SEQ_CST);
atomic::add(&IterCnt, uint32_t(num_of_records), __ATOMIC_SEQ_CST);
}

return 0;
Expand Down
22 changes: 8 additions & 14 deletions openmp/libomptarget/DeviceRTL/src/State.cpp
Expand Up @@ -298,14 +298,8 @@ uint32_t &lookupForModify32Impl(uint32_t ICVStateTy::*Var, IdentTy *Ident) {
return ThreadStates[TId]->ICVState.*Var;
}

uint32_t &lookup32Impl(uint32_t ICVStateTy::*Var) {
uint32_t TId = mapping::getThreadIdInBlock();
if (OMP_UNLIKELY(config::mayUseThreadStates() && ThreadStates[TId]))
return ThreadStates[TId]->ICVState.*Var;
return TeamState.ICVState.*Var;
}
uint64_t &lookup64Impl(uint64_t ICVStateTy::*Var) {
uint64_t TId = mapping::getThreadIdInBlock();
template <typename IntTy> IntTy &lookupImpl(IntTy ICVStateTy::*Var) {
IntTy TId = mapping::getThreadIdInBlock();
if (OMP_UNLIKELY(config::mayUseThreadStates() && ThreadStates[TId]))
return ThreadStates[TId]->ICVState.*Var;
return TeamState.ICVState.*Var;
Expand All @@ -330,27 +324,27 @@ uint32_t &state::lookup32(ValueKind Kind, bool IsReadonly, IdentTy *Ident) {
switch (Kind) {
case state::VK_NThreads:
if (IsReadonly)
return lookup32Impl(&ICVStateTy::NThreadsVar);
return lookupImpl<uint32_t>(&ICVStateTy::NThreadsVar);
return lookupForModify32Impl(&ICVStateTy::NThreadsVar, Ident);
case state::VK_Level:
if (IsReadonly)
return lookup32Impl(&ICVStateTy::LevelVar);
return lookupImpl<uint32_t>(&ICVStateTy::LevelVar);
return lookupForModify32Impl(&ICVStateTy::LevelVar, Ident);
case state::VK_ActiveLevel:
if (IsReadonly)
return lookup32Impl(&ICVStateTy::ActiveLevelVar);
return lookupImpl<uint32_t>(&ICVStateTy::ActiveLevelVar);
return lookupForModify32Impl(&ICVStateTy::ActiveLevelVar, Ident);
case state::VK_MaxActiveLevels:
if (IsReadonly)
return lookup32Impl(&ICVStateTy::MaxActiveLevelsVar);
return lookupImpl<uint32_t>(&ICVStateTy::MaxActiveLevelsVar);
return lookupForModify32Impl(&ICVStateTy::MaxActiveLevelsVar, Ident);
case state::VK_RunSched:
if (IsReadonly)
return lookup32Impl(&ICVStateTy::RunSchedVar);
return lookupImpl<uint32_t>(&ICVStateTy::RunSchedVar);
return lookupForModify32Impl(&ICVStateTy::RunSchedVar, Ident);
case state::VK_RunSchedChunk:
if (IsReadonly)
return lookup32Impl(&ICVStateTy::RunSchedChunkVar);
return lookupImpl<uint32_t>(&ICVStateTy::RunSchedChunkVar);
return lookupForModify32Impl(&ICVStateTy::RunSchedChunkVar, Ident);
case state::VK_ParallelTeamSize:
return TeamState.ParallelTeamSize;
Expand Down
9 changes: 6 additions & 3 deletions openmp/libomptarget/DeviceRTL/src/Workshare.cpp
Expand Up @@ -139,6 +139,7 @@ template <typename T, typename ST> struct omptarget_nvptx_LoopSupport {
numberOfActiveOMPThreads);
break;
}
[[fallthrough]];
} // note: if chunk <=0, use nochunk
case kmp_sched_static_balanced_chunk: {
if (chunk > 0) {
Expand All @@ -157,6 +158,7 @@ template <typename T, typename ST> struct omptarget_nvptx_LoopSupport {
ub = oldUb;
break;
}
[[fallthrough]];
} // note: if chunk <=0, use nochunk
case kmp_sched_static_nochunk: {
ForStaticNoChunk(lastiter, lb, ub, stride, chunk, gtid,
Expand All @@ -168,8 +170,9 @@ template <typename T, typename ST> struct omptarget_nvptx_LoopSupport {
ForStaticChunk(lastiter, lb, ub, stride, chunk, omp_get_team_num(),
omp_get_num_teams());
break;
} // note: if chunk <=0, use nochunk
}
}
[[fallthrough]];
} // note: if chunk <=0, use nochunk
case kmp_sched_distr_static_nochunk: {
ForStaticNoChunk(lastiter, lb, ub, stride, chunk, omp_get_team_num(),
omp_get_num_teams());
Expand Down Expand Up @@ -341,7 +344,7 @@ template <typename T, typename ST> struct omptarget_nvptx_LoopSupport {
uint32_t change = utils::popc(active);
__kmpc_impl_lanemask_t lane_mask_lt = mapping::lanemaskLT();
unsigned int rank = utils::popc(active & lane_mask_lt);
uint64_t warp_res;
uint64_t warp_res = 0;
if (rank == 0) {
warp_res = atomic::add(&Cnt, change, __ATOMIC_SEQ_CST);
}
Expand Down

0 comments on commit ce0caf4

Please sign in to comment.