Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 25 additions & 22 deletions offload/plugins-nextgen/amdgpu/src/rtl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ namespace hsa_utils {
/// Iterate elements using an HSA iterate function. Do not use this function
/// directly but the specialized ones below instead.
template <typename ElemTy, typename IterFuncTy, typename CallbackTy>
hsa_status_t iterate(IterFuncTy Func, CallbackTy Cb) {
static hsa_status_t iterate(IterFuncTy Func, CallbackTy Cb) {
auto L = [](ElemTy Elem, void *Data) -> hsa_status_t {
CallbackTy *Unwrapped = static_cast<CallbackTy *>(Data);
return (*Unwrapped)(Elem);
Expand All @@ -111,7 +111,8 @@ hsa_status_t iterate(IterFuncTy Func, CallbackTy Cb) {
/// use this function directly but the specialized ones below instead.
template <typename ElemTy, typename IterFuncTy, typename IterFuncArgTy,
typename CallbackTy>
hsa_status_t iterate(IterFuncTy Func, IterFuncArgTy FuncArg, CallbackTy Cb) {
static hsa_status_t iterate(IterFuncTy Func, IterFuncArgTy FuncArg,
CallbackTy Cb) {
auto L = [](ElemTy Elem, void *Data) -> hsa_status_t {
CallbackTy *Unwrapped = static_cast<CallbackTy *>(Data);
return (*Unwrapped)(Elem);
Expand All @@ -123,7 +124,8 @@ hsa_status_t iterate(IterFuncTy Func, IterFuncArgTy FuncArg, CallbackTy Cb) {
/// use this function directly but the specialized ones below instead.
template <typename Elem1Ty, typename Elem2Ty, typename IterFuncTy,
typename IterFuncArgTy, typename CallbackTy>
hsa_status_t iterate(IterFuncTy Func, IterFuncArgTy FuncArg, CallbackTy Cb) {
static hsa_status_t iterate(IterFuncTy Func, IterFuncArgTy FuncArg,
CallbackTy Cb) {
auto L = [](Elem1Ty Elem1, Elem2Ty Elem2, void *Data) -> hsa_status_t {
CallbackTy *Unwrapped = static_cast<CallbackTy *>(Data);
return (*Unwrapped)(Elem1, Elem2);
Expand All @@ -132,21 +134,21 @@ hsa_status_t iterate(IterFuncTy Func, IterFuncArgTy FuncArg, CallbackTy Cb) {
}

/// Iterate agents.
template <typename CallbackTy> Error iterateAgents(CallbackTy Callback) {
template <typename CallbackTy> static Error iterateAgents(CallbackTy Callback) {
hsa_status_t Status = iterate<hsa_agent_t>(hsa_iterate_agents, Callback);
return Plugin::check(Status, "error in hsa_iterate_agents: %s");
}

/// Iterate ISAs of an agent.
template <typename CallbackTy>
Error iterateAgentISAs(hsa_agent_t Agent, CallbackTy Cb) {
static Error iterateAgentISAs(hsa_agent_t Agent, CallbackTy Cb) {
hsa_status_t Status = iterate<hsa_isa_t>(hsa_agent_iterate_isas, Agent, Cb);
return Plugin::check(Status, "error in hsa_agent_iterate_isas: %s");
}

/// Iterate memory pools of an agent.
template <typename CallbackTy>
Error iterateAgentMemoryPools(hsa_agent_t Agent, CallbackTy Cb) {
static Error iterateAgentMemoryPools(hsa_agent_t Agent, CallbackTy Cb) {
hsa_status_t Status = iterate<hsa_amd_memory_pool_t>(
hsa_amd_agent_iterate_memory_pools, Agent, Cb);
return Plugin::check(Status,
Expand All @@ -155,10 +157,12 @@ Error iterateAgentMemoryPools(hsa_agent_t Agent, CallbackTy Cb) {

/// Dispatches an asynchronous memory copy.
/// Enables different SDMA engines for the dispatch in a round-robin fashion.
Error asyncMemCopy(bool UseMultipleSdmaEngines, void *Dst, hsa_agent_t DstAgent,
const void *Src, hsa_agent_t SrcAgent, size_t Size,
uint32_t NumDepSignals, const hsa_signal_t *DepSignals,
hsa_signal_t CompletionSignal) {
static Error asyncMemCopy(bool UseMultipleSdmaEngines, void *Dst,
hsa_agent_t DstAgent, const void *Src,
hsa_agent_t SrcAgent, size_t Size,
uint32_t NumDepSignals,
const hsa_signal_t *DepSignals,
hsa_signal_t CompletionSignal) {
if (!UseMultipleSdmaEngines) {
hsa_status_t S =
hsa_amd_memory_async_copy(Dst, DstAgent, Src, SrcAgent, Size,
Expand Down Expand Up @@ -193,8 +197,8 @@ Error asyncMemCopy(bool UseMultipleSdmaEngines, void *Dst, hsa_agent_t DstAgent,
#endif
}

Error getTargetTripleAndFeatures(hsa_agent_t Agent,
SmallVector<SmallString<32>> &Targets) {
static Error getTargetTripleAndFeatures(hsa_agent_t Agent,
SmallVector<SmallString<32>> &Targets) {
auto Err = hsa_utils::iterateAgentISAs(Agent, [&](hsa_isa_t ISA) {
uint32_t Length;
hsa_status_t Status;
Expand Down Expand Up @@ -1222,7 +1226,7 @@ struct AMDGPUStreamTy {
assert(Args->Dst && "Invalid destination buffer");
assert(Args->Src && "Invalid source buffer");

auto BasePtr = Args->Dst;
auto *BasePtr = Args->Dst;
for (size_t I = 0; I < Args->NumTimes; I++) {
std::memcpy(BasePtr, Args->Src, Args->Size);
BasePtr = reinterpret_cast<uint8_t *>(BasePtr) + Args->Size;
Expand Down Expand Up @@ -2673,11 +2677,10 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
// hsa_amd_memory_fill doesn't signal completion using a signal, so use
// the existing host callback logic to handle that instead
return Stream->pushHostCallback(Fill, Args);
} else {
// If there is no pending work, do the fill synchronously
auto Status = hsa_amd_memory_fill(TgtPtr, Pattern, Size / 4);
return Plugin::check(Status, "error in hsa_amd_memory_fill: %s\n");
}
// If there is no pending work, do the fill synchronously
auto Status = hsa_amd_memory_fill(TgtPtr, Pattern, Size / 4);
return Plugin::check(Status, "error in hsa_amd_memory_fill: %s\n");
}

// Slow case; allocate an appropriate memory size and enqueue copies
Expand Down Expand Up @@ -2759,7 +2762,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
}

Expected<bool> hasPendingWorkImpl(AsyncInfoWrapperTy &AsyncInfo) override {
auto Stream = AsyncInfo.getQueueAs<AMDGPUStreamTy *>();
auto *Stream = AsyncInfo.getQueueAs<AMDGPUStreamTy *>();
if (!Stream)
return false;

Expand All @@ -2772,7 +2775,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
Expected<bool> isEventCompleteImpl(void *EventPtr,
AsyncInfoWrapperTy &AsyncInfo) override {
AMDGPUEventTy *Event = reinterpret_cast<AMDGPUEventTy *>(EventPtr);
auto Stream = AsyncInfo.getQueueAs<AMDGPUStreamTy *>();
auto *Stream = AsyncInfo.getQueueAs<AMDGPUStreamTy *>();
return Stream && Stream->isEventComplete(*Event);
}

Expand Down Expand Up @@ -2829,7 +2832,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
hsa_device_type_t DevType;
Status = getDeviceAttrRaw(HSA_AGENT_INFO_DEVICE, DevType);
if (Status == HSA_STATUS_SUCCESS) {
switch (DevType) {
switch (static_cast<int>(DevType)) {
case HSA_DEVICE_TYPE_CPU:
TmpCharPtr = "CPU";
break;
Expand Down Expand Up @@ -3746,8 +3749,8 @@ Error AMDGPUKernelTy::printLaunchInfoDetails(GenericDeviceTy &GenericDevice,
return Plugin::success();

// General Info
auto NumGroups = NumBlocks;
auto ThreadsPerGroup = NumThreads;
auto *NumGroups = NumBlocks;
auto *ThreadsPerGroup = NumThreads;

// Kernel Arguments Info
auto ArgNum = KernelArgs.NumArgs;
Expand Down
8 changes: 4 additions & 4 deletions offload/plugins-nextgen/cuda/src/rtl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -160,15 +160,15 @@ struct CUDAKernelTy : public GenericKernelTy {
/// Return maximum block size for maximum occupancy
Expected<uint64_t> maxGroupSize(GenericDeviceTy &,
uint64_t DynamicMemSize) const override {
int minGridSize;
int maxBlockSize;
int MinGridSize;
int MaxBlockSize;
auto Res = cuOccupancyMaxPotentialBlockSize(
&minGridSize, &maxBlockSize, Func, NULL, DynamicMemSize, INT_MAX);
&MinGridSize, &MaxBlockSize, Func, NULL, DynamicMemSize, INT_MAX);
if (auto Err = Plugin::check(
Res, "error in cuOccupancyMaxPotentialBlockSize: %s")) {
return Err;
}
return maxBlockSize;
return MaxBlockSize;
}

private:
Expand Down
2 changes: 2 additions & 0 deletions openmp/device/include/State.h
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,8 @@ template <typename VTy, typename Ty> struct ValueRAII {
Ty Val;
bool Active;
};
template <typename VTy, typename Ty>
ValueRAII(VTy &, Ty, Ty, bool, IdentTy *, bool) -> ValueRAII<VTy, Ty>;

/// TODO
inline state::Value<uint32_t, state::VK_RunSchedChunk> RunSchedChunk;
Expand Down
3 changes: 2 additions & 1 deletion openmp/device/src/Misc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ namespace impl {
/// Lookup a device-side function using a host pointer /p HstPtr using the table
/// provided by the device plugin. The table is an ordered pair of host and
/// device pointers sorted on the value of the host pointer.
void *indirectCallLookup(void *HstPtr) {
static void *indirectCallLookup(void *HstPtr) {
if (!HstPtr)
return nullptr;

Expand Down Expand Up @@ -114,6 +114,7 @@ void omp_free(void *ptr, omp_allocator_handle_t allocator) {
case omp_high_bw_mem_alloc:
case omp_low_lat_mem_alloc:
free(ptr);
return;
case omp_null_allocator:
default:
return;
Expand Down
2 changes: 0 additions & 2 deletions openmp/device/src/Synchronization.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,6 @@ uint32_t atomicInc(uint32_t *A, uint32_t V, atomic::OrderingTy Ordering,
ScopeSwitch(ORDER)

switch (Ordering) {
default:
__builtin_unreachable();
Case(atomic::relaxed);
Case(atomic::acquire);
Case(atomic::release);
Expand Down
3 changes: 0 additions & 3 deletions openmp/tools/omptest/src/OmptAssertEvent.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,6 @@ const char *omptest::to_string(ObserveState State) {
return "Always";
case ObserveState::Never:
return "Never";
default:
assert(false && "Requested string representation for unknown ObserveState");
return "UNKNOWN";
}
}

Expand Down
Loading