diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp index 1d33bfc1a0be9..277fad29fd24f 100644 --- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp +++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp @@ -99,7 +99,7 @@ namespace hsa_utils { /// Iterate elements using an HSA iterate function. Do not use this function /// directly but the specialized ones below instead. template -hsa_status_t iterate(IterFuncTy Func, CallbackTy Cb) { +static hsa_status_t iterate(IterFuncTy Func, CallbackTy Cb) { auto L = [](ElemTy Elem, void *Data) -> hsa_status_t { CallbackTy *Unwrapped = static_cast(Data); return (*Unwrapped)(Elem); @@ -111,7 +111,8 @@ hsa_status_t iterate(IterFuncTy Func, CallbackTy Cb) { /// use this function directly but the specialized ones below instead. template -hsa_status_t iterate(IterFuncTy Func, IterFuncArgTy FuncArg, CallbackTy Cb) { +static hsa_status_t iterate(IterFuncTy Func, IterFuncArgTy FuncArg, + CallbackTy Cb) { auto L = [](ElemTy Elem, void *Data) -> hsa_status_t { CallbackTy *Unwrapped = static_cast(Data); return (*Unwrapped)(Elem); @@ -123,7 +124,8 @@ hsa_status_t iterate(IterFuncTy Func, IterFuncArgTy FuncArg, CallbackTy Cb) { /// use this function directly but the specialized ones below instead. template -hsa_status_t iterate(IterFuncTy Func, IterFuncArgTy FuncArg, CallbackTy Cb) { +static hsa_status_t iterate(IterFuncTy Func, IterFuncArgTy FuncArg, + CallbackTy Cb) { auto L = [](Elem1Ty Elem1, Elem2Ty Elem2, void *Data) -> hsa_status_t { CallbackTy *Unwrapped = static_cast(Data); return (*Unwrapped)(Elem1, Elem2); @@ -132,21 +134,21 @@ hsa_status_t iterate(IterFuncTy Func, IterFuncArgTy FuncArg, CallbackTy Cb) { } /// Iterate agents. -template Error iterateAgents(CallbackTy Callback) { +template static Error iterateAgents(CallbackTy Callback) { hsa_status_t Status = iterate(hsa_iterate_agents, Callback); return Plugin::check(Status, "error in hsa_iterate_agents: %s"); } /// Iterate ISAs of an agent. template -Error iterateAgentISAs(hsa_agent_t Agent, CallbackTy Cb) { +static Error iterateAgentISAs(hsa_agent_t Agent, CallbackTy Cb) { hsa_status_t Status = iterate(hsa_agent_iterate_isas, Agent, Cb); return Plugin::check(Status, "error in hsa_agent_iterate_isas: %s"); } /// Iterate memory pools of an agent. template -Error iterateAgentMemoryPools(hsa_agent_t Agent, CallbackTy Cb) { +static Error iterateAgentMemoryPools(hsa_agent_t Agent, CallbackTy Cb) { hsa_status_t Status = iterate( hsa_amd_agent_iterate_memory_pools, Agent, Cb); return Plugin::check(Status, @@ -155,10 +157,12 @@ Error iterateAgentMemoryPools(hsa_agent_t Agent, CallbackTy Cb) { /// Dispatches an asynchronous memory copy. /// Enables different SDMA engines for the dispatch in a round-robin fashion. -Error asyncMemCopy(bool UseMultipleSdmaEngines, void *Dst, hsa_agent_t DstAgent, - const void *Src, hsa_agent_t SrcAgent, size_t Size, - uint32_t NumDepSignals, const hsa_signal_t *DepSignals, - hsa_signal_t CompletionSignal) { +static Error asyncMemCopy(bool UseMultipleSdmaEngines, void *Dst, + hsa_agent_t DstAgent, const void *Src, + hsa_agent_t SrcAgent, size_t Size, + uint32_t NumDepSignals, + const hsa_signal_t *DepSignals, + hsa_signal_t CompletionSignal) { if (!UseMultipleSdmaEngines) { hsa_status_t S = hsa_amd_memory_async_copy(Dst, DstAgent, Src, SrcAgent, Size, @@ -193,8 +197,8 @@ Error asyncMemCopy(bool UseMultipleSdmaEngines, void *Dst, hsa_agent_t DstAgent, #endif } -Error getTargetTripleAndFeatures(hsa_agent_t Agent, - SmallVector> &Targets) { +static Error getTargetTripleAndFeatures(hsa_agent_t Agent, + SmallVector> &Targets) { auto Err = hsa_utils::iterateAgentISAs(Agent, [&](hsa_isa_t ISA) { uint32_t Length; hsa_status_t Status; @@ -1222,7 +1226,7 @@ struct AMDGPUStreamTy { assert(Args->Dst && "Invalid destination buffer"); assert(Args->Src && "Invalid source buffer"); - auto BasePtr = Args->Dst; + auto *BasePtr = Args->Dst; for (size_t I = 0; I < Args->NumTimes; I++) { std::memcpy(BasePtr, Args->Src, Args->Size); BasePtr = reinterpret_cast(BasePtr) + Args->Size; @@ -2673,11 +2677,10 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { // hsa_amd_memory_fill doesn't signal completion using a signal, so use // the existing host callback logic to handle that instead return Stream->pushHostCallback(Fill, Args); - } else { - // If there is no pending work, do the fill synchronously - auto Status = hsa_amd_memory_fill(TgtPtr, Pattern, Size / 4); - return Plugin::check(Status, "error in hsa_amd_memory_fill: %s\n"); } + // If there is no pending work, do the fill synchronously + auto Status = hsa_amd_memory_fill(TgtPtr, Pattern, Size / 4); + return Plugin::check(Status, "error in hsa_amd_memory_fill: %s\n"); } // Slow case; allocate an appropriate memory size and enqueue copies @@ -2759,7 +2762,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { } Expected hasPendingWorkImpl(AsyncInfoWrapperTy &AsyncInfo) override { - auto Stream = AsyncInfo.getQueueAs(); + auto *Stream = AsyncInfo.getQueueAs(); if (!Stream) return false; @@ -2772,7 +2775,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { Expected isEventCompleteImpl(void *EventPtr, AsyncInfoWrapperTy &AsyncInfo) override { AMDGPUEventTy *Event = reinterpret_cast(EventPtr); - auto Stream = AsyncInfo.getQueueAs(); + auto *Stream = AsyncInfo.getQueueAs(); return Stream && Stream->isEventComplete(*Event); } @@ -2829,7 +2832,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { hsa_device_type_t DevType; Status = getDeviceAttrRaw(HSA_AGENT_INFO_DEVICE, DevType); if (Status == HSA_STATUS_SUCCESS) { - switch (DevType) { + switch (static_cast(DevType)) { case HSA_DEVICE_TYPE_CPU: TmpCharPtr = "CPU"; break; @@ -3746,8 +3749,8 @@ Error AMDGPUKernelTy::printLaunchInfoDetails(GenericDeviceTy &GenericDevice, return Plugin::success(); // General Info - auto NumGroups = NumBlocks; - auto ThreadsPerGroup = NumThreads; + auto *NumGroups = NumBlocks; + auto *ThreadsPerGroup = NumThreads; // Kernel Arguments Info auto ArgNum = KernelArgs.NumArgs; diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp index d973c2d4dd320..fdd470fcd8113 100644 --- a/offload/plugins-nextgen/cuda/src/rtl.cpp +++ b/offload/plugins-nextgen/cuda/src/rtl.cpp @@ -160,15 +160,15 @@ struct CUDAKernelTy : public GenericKernelTy { /// Return maximum block size for maximum occupancy Expected maxGroupSize(GenericDeviceTy &, uint64_t DynamicMemSize) const override { - int minGridSize; - int maxBlockSize; + int MinGridSize; + int MaxBlockSize; auto Res = cuOccupancyMaxPotentialBlockSize( - &minGridSize, &maxBlockSize, Func, NULL, DynamicMemSize, INT_MAX); + &MinGridSize, &MaxBlockSize, Func, NULL, DynamicMemSize, INT_MAX); if (auto Err = Plugin::check( Res, "error in cuOccupancyMaxPotentialBlockSize: %s")) { return Err; } - return maxBlockSize; + return MaxBlockSize; } private: diff --git a/openmp/device/include/State.h b/openmp/device/include/State.h index db396dae6e445..cd6013780a49c 100644 --- a/openmp/device/include/State.h +++ b/openmp/device/include/State.h @@ -327,6 +327,8 @@ template struct ValueRAII { Ty Val; bool Active; }; +template +ValueRAII(VTy &, Ty, Ty, bool, IdentTy *, bool) -> ValueRAII; /// TODO inline state::Value RunSchedChunk; diff --git a/openmp/device/src/Misc.cpp b/openmp/device/src/Misc.cpp index a89f8b2a74531..563f674d166e5 100644 --- a/openmp/device/src/Misc.cpp +++ b/openmp/device/src/Misc.cpp @@ -23,7 +23,7 @@ namespace impl { /// Lookup a device-side function using a host pointer /p HstPtr using the table /// provided by the device plugin. The table is an ordered pair of host and /// device pointers sorted on the value of the host pointer. -void *indirectCallLookup(void *HstPtr) { +static void *indirectCallLookup(void *HstPtr) { if (!HstPtr) return nullptr; @@ -114,6 +114,7 @@ void omp_free(void *ptr, omp_allocator_handle_t allocator) { case omp_high_bw_mem_alloc: case omp_low_lat_mem_alloc: free(ptr); + return; case omp_null_allocator: default: return; diff --git a/openmp/device/src/Synchronization.cpp b/openmp/device/src/Synchronization.cpp index 2f1ed34a3f6d6..501dc4a291ed1 100644 --- a/openmp/device/src/Synchronization.cpp +++ b/openmp/device/src/Synchronization.cpp @@ -57,8 +57,6 @@ uint32_t atomicInc(uint32_t *A, uint32_t V, atomic::OrderingTy Ordering, ScopeSwitch(ORDER) switch (Ordering) { - default: - __builtin_unreachable(); Case(atomic::relaxed); Case(atomic::acquire); Case(atomic::release); diff --git a/openmp/tools/omptest/src/OmptAssertEvent.cpp b/openmp/tools/omptest/src/OmptAssertEvent.cpp index bbf2d7cd4a10f..a5a2e7969e980 100644 --- a/openmp/tools/omptest/src/OmptAssertEvent.cpp +++ b/openmp/tools/omptest/src/OmptAssertEvent.cpp @@ -24,9 +24,6 @@ const char *omptest::to_string(ObserveState State) { return "Always"; case ObserveState::Never: return "Never"; - default: - assert(false && "Requested string representation for unknown ObserveState"); - return "UNKNOWN"; } }