diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h index e39c2e11390e13..41fa9f5345aa83 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -306,9 +306,6 @@ class CGOpenMPRuntime { CodeGenModule &CGM; StringRef FirstSeparator, Separator; - /// An OpenMP-IR-Builder instance. - llvm::OpenMPIRBuilder OMPBuilder; - /// Constructor allowing to redefine the name separator for the variables. explicit CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, StringRef Separator); @@ -389,6 +386,8 @@ class CGOpenMPRuntime { llvm::Value *getCriticalRegionLock(StringRef CriticalName); private: + /// An OpenMP-IR-Builder instance. + llvm::OpenMPIRBuilder OMPBuilder; /// Map for SourceLocation and OpenMP runtime library debug locations. typedef llvm::DenseMap OpenMPDebugLocMapTy; diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index dbd24d33cc376b..d9ef6c2a107894 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -28,6 +28,96 @@ using namespace CodeGen; using namespace llvm::omp; namespace { +enum OpenMPRTLFunctionNVPTX { + /// Call to void __kmpc_kernel_init(kmp_int32 thread_limit, + /// int16_t RequiresOMPRuntime); + OMPRTL_NVPTX__kmpc_kernel_init, + /// Call to void __kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized); + OMPRTL_NVPTX__kmpc_kernel_deinit, + /// Call to void __kmpc_spmd_kernel_init(kmp_int32 thread_limit, + /// int16_t RequiresOMPRuntime, int16_t RequiresDataSharing); + OMPRTL_NVPTX__kmpc_spmd_kernel_init, + /// Call to void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime); + OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2, + /// Call to void __kmpc_kernel_prepare_parallel(void + /// *outlined_function); + OMPRTL_NVPTX__kmpc_kernel_prepare_parallel, + /// Call to bool __kmpc_kernel_parallel(void **outlined_function); + OMPRTL_NVPTX__kmpc_kernel_parallel, + /// Call to void __kmpc_kernel_end_parallel(); + OMPRTL_NVPTX__kmpc_kernel_end_parallel, + /// Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 + /// global_tid); + OMPRTL_NVPTX__kmpc_serialized_parallel, + /// Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 + /// global_tid); + OMPRTL_NVPTX__kmpc_end_serialized_parallel, + /// Call to int32_t __kmpc_shuffle_int32(int32_t element, + /// int16_t lane_offset, int16_t warp_size); + OMPRTL_NVPTX__kmpc_shuffle_int32, + /// Call to int64_t __kmpc_shuffle_int64(int64_t element, + /// int16_t lane_offset, int16_t warp_size); + OMPRTL_NVPTX__kmpc_shuffle_int64, + /// Call to __kmpc_nvptx_parallel_reduce_nowait_v2(ident_t *loc, kmp_int32 + /// global_tid, kmp_int32 num_vars, size_t reduce_size, void* reduce_data, + /// void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t + /// lane_offset, int16_t shortCircuit), + /// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num)); + OMPRTL_NVPTX__kmpc_nvptx_parallel_reduce_nowait_v2, + /// Call to __kmpc_nvptx_teams_reduce_nowait_v2(ident_t *loc, kmp_int32 + /// global_tid, void *global_buffer, int32_t num_of_records, void* + /// reduce_data, + /// void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t + /// lane_offset, int16_t shortCircuit), + /// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num), void + /// (*kmp_ListToGlobalCpyFctPtr)(void *buffer, int idx, void *reduce_data), + /// void (*kmp_GlobalToListCpyFctPtr)(void *buffer, int idx, + /// void *reduce_data), void (*kmp_GlobalToListCpyPtrsFctPtr)(void *buffer, + /// int idx, void *reduce_data), void (*kmp_GlobalToListRedFctPtr)(void + /// *buffer, int idx, void *reduce_data)); + OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_v2, + /// Call to __kmpc_nvptx_end_reduce_nowait(int32_t global_tid); + OMPRTL_NVPTX__kmpc_end_reduce_nowait, + /// Call to void __kmpc_data_sharing_init_stack(); + OMPRTL_NVPTX__kmpc_data_sharing_init_stack, + /// Call to void __kmpc_data_sharing_init_stack_spmd(); + OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd, + /// Call to void* __kmpc_data_sharing_coalesced_push_stack(size_t size, + /// int16_t UseSharedMemory); + OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack, + /// Call to void* __kmpc_data_sharing_push_stack(size_t size, int16_t + /// UseSharedMemory); + OMPRTL_NVPTX__kmpc_data_sharing_push_stack, + /// Call to void __kmpc_data_sharing_pop_stack(void *a); + OMPRTL_NVPTX__kmpc_data_sharing_pop_stack, + /// Call to void __kmpc_begin_sharing_variables(void ***args, + /// size_t n_args); + OMPRTL_NVPTX__kmpc_begin_sharing_variables, + /// Call to void __kmpc_end_sharing_variables(); + OMPRTL_NVPTX__kmpc_end_sharing_variables, + /// Call to void __kmpc_get_shared_variables(void ***GlobalArgs) + OMPRTL_NVPTX__kmpc_get_shared_variables, + /// Call to uint16_t __kmpc_parallel_level(ident_t *loc, kmp_int32 + /// global_tid); + OMPRTL_NVPTX__kmpc_parallel_level, + /// Call to int8_t __kmpc_is_spmd_exec_mode(); + OMPRTL_NVPTX__kmpc_is_spmd_exec_mode, + /// Call to void __kmpc_get_team_static_memory(int16_t isSPMDExecutionMode, + /// const void *buf, size_t size, int16_t is_shared, const void **res); + OMPRTL_NVPTX__kmpc_get_team_static_memory, + /// Call to void __kmpc_restore_team_static_memory(int16_t + /// isSPMDExecutionMode, int16_t is_shared); + OMPRTL_NVPTX__kmpc_restore_team_static_memory, + /// Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); + OMPRTL__kmpc_barrier, + /// Call to void __kmpc_barrier_simple_spmd(ident_t *loc, kmp_int32 + /// global_tid); + OMPRTL__kmpc_barrier_simple_spmd, + /// Call to int32_t __kmpc_warp_active_thread_mask(void); + OMPRTL_NVPTX__kmpc_warp_active_thread_mask, + /// Call to void __kmpc_syncwarp(int32_t Mask); + OMPRTL_NVPTX__kmpc_syncwarp, +}; /// Pre(post)-action for different OpenMP constructs specialized for NVPTX. class NVPTXActionTy final : public PrePostActionTy { @@ -1153,13 +1243,13 @@ void CGOpenMPRuntimeGPU::emitNonSPMDEntryHeader(CodeGenFunction &CGF, // TODO: Optimize runtime initialization and pass in correct value. llvm::Value *Args[] = {getThreadLimit(CGF), Bld.getInt16(/*RequiresOMPRuntime=*/1)}; - CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_kernel_init), - Args); + CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_init), Args); // For data sharing, we need to initialize the stack. - CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_data_sharing_init_stack)); + CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction( + OMPRTL_NVPTX__kmpc_data_sharing_init_stack)); emitGenericVarsProlog(CGF, WST.Loc); } @@ -1182,9 +1272,8 @@ void CGOpenMPRuntimeGPU::emitNonSPMDEntryFooter(CodeGenFunction &CGF, // Signal termination condition. // TODO: Optimize runtime initialization and pass in correct value. llvm::Value *Args[] = {CGF.Builder.getInt16(/*IsOMPRuntimeInitialized=*/1)}; - CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_kernel_deinit), - Args); + CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_deinit), Args); // Barrier to terminate worker threads. syncCTAThreads(CGF); // Master thread jumps to exit point. @@ -1258,14 +1347,13 @@ void CGOpenMPRuntimeGPU::emitSPMDEntryHeader( /*RequiresOMPRuntime=*/ Bld.getInt16(RequiresFullRuntime ? 1 : 0), /*RequiresDataSharing=*/Bld.getInt16(0)}; - CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_spmd_kernel_init), - Args); + CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_spmd_kernel_init), Args); if (RequiresFullRuntime) { // For data sharing, we need to initialize the stack. - CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_data_sharing_init_stack_spmd)); + CGF.EmitRuntimeCall(createNVPTXRuntimeFunction( + OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd)); } CGF.EmitBranch(ExecuteBB); @@ -1291,9 +1379,9 @@ void CGOpenMPRuntimeGPU::emitSPMDEntryFooter(CodeGenFunction &CGF, // DeInitialize the OMP state in the runtime; called by all active threads. llvm::Value *Args[] = {/*RequiresOMPRuntime=*/ CGF.Builder.getInt16(RequiresFullRuntime ? 1 : 0)}; - CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_spmd_kernel_deinit_v2), - Args); + CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction( + OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2), Args); CGF.EmitBranch(EST.ExitBB); CGF.EmitBlock(EST.ExitBB); @@ -1327,7 +1415,7 @@ void CGOpenMPRuntimeGPU::emitWorkerFunction(WorkerFunctionState &WST) { } void CGOpenMPRuntimeGPU::emitWorkerLoop(CodeGenFunction &CGF, - WorkerFunctionState &WST) { + WorkerFunctionState &WST) { // // The workers enter this loop and wait for parallel work from the master. // When the master encounters a parallel region it sets up the work + variable @@ -1362,10 +1450,8 @@ void CGOpenMPRuntimeGPU::emitWorkerLoop(CodeGenFunction &CGF, // TODO: Optimize runtime initialization and pass in correct value. llvm::Value *Args[] = {WorkFn.getPointer()}; - llvm::Value *Ret = - CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_kernel_parallel), - Args); + llvm::Value *Ret = CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_parallel), Args); Bld.CreateStore(Bld.CreateZExt(Ret, CGF.Int8Ty), ExecStatus); // On termination condition (workid == 0), exit loop. @@ -1430,9 +1516,9 @@ void CGOpenMPRuntimeGPU::emitWorkerLoop(CodeGenFunction &CGF, // Signal end of parallel region. CGF.EmitBlock(TerminateBB); - CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_kernel_end_parallel), - llvm::None); + CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_end_parallel), + llvm::None); CGF.EmitBranch(BarrierBB); // All active and inactive workers wait at a barrier after parallel region. @@ -1447,6 +1533,328 @@ void CGOpenMPRuntimeGPU::emitWorkerLoop(CodeGenFunction &CGF, clearLocThreadIdInsertPt(CGF); } +/// Returns specified OpenMP runtime function for the current OpenMP +/// implementation. Specialized for the NVPTX device. +/// \param Function OpenMP runtime function. +/// \return Specified function. +llvm::FunctionCallee +CGOpenMPRuntimeGPU::createNVPTXRuntimeFunction(unsigned Function) { + llvm::FunctionCallee RTLFn = nullptr; + switch (static_cast(Function)) { + case OMPRTL_NVPTX__kmpc_kernel_init: { + // Build void __kmpc_kernel_init(kmp_int32 thread_limit, int16_t + // RequiresOMPRuntime); + llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_init"); + break; + } + case OMPRTL_NVPTX__kmpc_kernel_deinit: { + // Build void __kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized); + llvm::Type *TypeParams[] = {CGM.Int16Ty}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_deinit"); + break; + } + case OMPRTL_NVPTX__kmpc_spmd_kernel_init: { + // Build void __kmpc_spmd_kernel_init(kmp_int32 thread_limit, + // int16_t RequiresOMPRuntime, int16_t RequiresDataSharing); + llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty, CGM.Int16Ty}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_init"); + break; + } + case OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2: { + // Build void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime); + llvm::Type *TypeParams[] = {CGM.Int16Ty}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_deinit_v2"); + break; + } + case OMPRTL_NVPTX__kmpc_kernel_prepare_parallel: { + /// Build void __kmpc_kernel_prepare_parallel( + /// void *outlined_function); + llvm::Type *TypeParams[] = {CGM.Int8PtrTy}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_prepare_parallel"); + break; + } + case OMPRTL_NVPTX__kmpc_kernel_parallel: { + /// Build bool __kmpc_kernel_parallel(void **outlined_function); + llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy}; + llvm::Type *RetTy = CGM.getTypes().ConvertType(CGM.getContext().BoolTy); + auto *FnTy = + llvm::FunctionType::get(RetTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_parallel"); + break; + } + case OMPRTL_NVPTX__kmpc_kernel_end_parallel: { + /// Build void __kmpc_kernel_end_parallel(); + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_end_parallel"); + break; + } + case OMPRTL_NVPTX__kmpc_serialized_parallel: { + // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 + // global_tid); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); + break; + } + case OMPRTL_NVPTX__kmpc_end_serialized_parallel: { + // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 + // global_tid); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); + break; + } + case OMPRTL_NVPTX__kmpc_shuffle_int32: { + // Build int32_t __kmpc_shuffle_int32(int32_t element, + // int16_t lane_offset, int16_t warp_size); + llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty, CGM.Int16Ty}; + auto *FnTy = + llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_shuffle_int32"); + break; + } + case OMPRTL_NVPTX__kmpc_shuffle_int64: { + // Build int64_t __kmpc_shuffle_int64(int64_t element, + // int16_t lane_offset, int16_t warp_size); + llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int16Ty, CGM.Int16Ty}; + auto *FnTy = + llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_shuffle_int64"); + break; + } + case OMPRTL_NVPTX__kmpc_nvptx_parallel_reduce_nowait_v2: { + // Build int32_t kmpc_nvptx_parallel_reduce_nowait_v2(ident_t *loc, + // kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void* + // reduce_data, void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t + // lane_id, int16_t lane_offset, int16_t Algorithm Version), void + // (*kmp_InterWarpCopyFctPtr)(void* src, int warp_num)); + llvm::Type *ShuffleReduceTypeParams[] = {CGM.VoidPtrTy, CGM.Int16Ty, + CGM.Int16Ty, CGM.Int16Ty}; + auto *ShuffleReduceFnTy = + llvm::FunctionType::get(CGM.VoidTy, ShuffleReduceTypeParams, + /*isVarArg=*/false); + llvm::Type *InterWarpCopyTypeParams[] = {CGM.VoidPtrTy, CGM.Int32Ty}; + auto *InterWarpCopyFnTy = + llvm::FunctionType::get(CGM.VoidTy, InterWarpCopyTypeParams, + /*isVarArg=*/false); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), + CGM.Int32Ty, + CGM.Int32Ty, + CGM.SizeTy, + CGM.VoidPtrTy, + ShuffleReduceFnTy->getPointerTo(), + InterWarpCopyFnTy->getPointerTo()}; + auto *FnTy = + llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction( + FnTy, /*Name=*/"__kmpc_nvptx_parallel_reduce_nowait_v2"); + break; + } + case OMPRTL_NVPTX__kmpc_end_reduce_nowait: { + // Build __kmpc_end_reduce_nowait(kmp_int32 global_tid); + llvm::Type *TypeParams[] = {CGM.Int32Ty}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction( + FnTy, /*Name=*/"__kmpc_nvptx_end_reduce_nowait"); + break; + } + case OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_v2: { + // Build int32_t __kmpc_nvptx_teams_reduce_nowait_v2(ident_t *loc, kmp_int32 + // global_tid, void *global_buffer, int32_t num_of_records, void* + // reduce_data, + // void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t + // lane_offset, int16_t shortCircuit), + // void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num), void + // (*kmp_ListToGlobalCpyFctPtr)(void *buffer, int idx, void *reduce_data), + // void (*kmp_GlobalToListCpyFctPtr)(void *buffer, int idx, + // void *reduce_data), void (*kmp_GlobalToListCpyPtrsFctPtr)(void *buffer, + // int idx, void *reduce_data), void (*kmp_GlobalToListRedFctPtr)(void + // *buffer, int idx, void *reduce_data)); + llvm::Type *ShuffleReduceTypeParams[] = {CGM.VoidPtrTy, CGM.Int16Ty, + CGM.Int16Ty, CGM.Int16Ty}; + auto *ShuffleReduceFnTy = + llvm::FunctionType::get(CGM.VoidTy, ShuffleReduceTypeParams, + /*isVarArg=*/false); + llvm::Type *InterWarpCopyTypeParams[] = {CGM.VoidPtrTy, CGM.Int32Ty}; + auto *InterWarpCopyFnTy = + llvm::FunctionType::get(CGM.VoidTy, InterWarpCopyTypeParams, + /*isVarArg=*/false); + llvm::Type *GlobalListTypeParams[] = {CGM.VoidPtrTy, CGM.IntTy, + CGM.VoidPtrTy}; + auto *GlobalListFnTy = + llvm::FunctionType::get(CGM.VoidTy, GlobalListTypeParams, + /*isVarArg=*/false); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), + CGM.Int32Ty, + CGM.VoidPtrTy, + CGM.Int32Ty, + CGM.VoidPtrTy, + ShuffleReduceFnTy->getPointerTo(), + InterWarpCopyFnTy->getPointerTo(), + GlobalListFnTy->getPointerTo(), + GlobalListFnTy->getPointerTo(), + GlobalListFnTy->getPointerTo(), + GlobalListFnTy->getPointerTo()}; + auto *FnTy = + llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction( + FnTy, /*Name=*/"__kmpc_nvptx_teams_reduce_nowait_v2"); + break; + } + case OMPRTL_NVPTX__kmpc_data_sharing_init_stack: { + /// Build void __kmpc_data_sharing_init_stack(); + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_data_sharing_init_stack"); + break; + } + case OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd: { + /// Build void __kmpc_data_sharing_init_stack_spmd(); + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false); + RTLFn = + CGM.CreateRuntimeFunction(FnTy, "__kmpc_data_sharing_init_stack_spmd"); + break; + } + case OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack: { + // Build void *__kmpc_data_sharing_coalesced_push_stack(size_t size, + // int16_t UseSharedMemory); + llvm::Type *TypeParams[] = {CGM.SizeTy, CGM.Int16Ty}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction( + FnTy, /*Name=*/"__kmpc_data_sharing_coalesced_push_stack"); + break; + } + case OMPRTL_NVPTX__kmpc_data_sharing_push_stack: { + // Build void *__kmpc_data_sharing_push_stack(size_t size, int16_t + // UseSharedMemory); + llvm::Type *TypeParams[] = {CGM.SizeTy, CGM.Int16Ty}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction( + FnTy, /*Name=*/"__kmpc_data_sharing_push_stack"); + break; + } + case OMPRTL_NVPTX__kmpc_data_sharing_pop_stack: { + // Build void __kmpc_data_sharing_pop_stack(void *a); + llvm::Type *TypeParams[] = {CGM.VoidPtrTy}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, + /*Name=*/"__kmpc_data_sharing_pop_stack"); + break; + } + case OMPRTL_NVPTX__kmpc_begin_sharing_variables: { + /// Build void __kmpc_begin_sharing_variables(void ***args, + /// size_t n_args); + llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy->getPointerTo(), CGM.SizeTy}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_begin_sharing_variables"); + break; + } + case OMPRTL_NVPTX__kmpc_end_sharing_variables: { + /// Build void __kmpc_end_sharing_variables(); + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_sharing_variables"); + break; + } + case OMPRTL_NVPTX__kmpc_get_shared_variables: { + /// Build void __kmpc_get_shared_variables(void ***GlobalArgs); + llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy->getPointerTo()}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_get_shared_variables"); + break; + } + case OMPRTL_NVPTX__kmpc_parallel_level: { + // Build uint16_t __kmpc_parallel_level(ident_t *loc, kmp_int32 global_tid); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; + auto *FnTy = + llvm::FunctionType::get(CGM.Int16Ty, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_parallel_level"); + break; + } + case OMPRTL_NVPTX__kmpc_is_spmd_exec_mode: { + // Build int8_t __kmpc_is_spmd_exec_mode(); + auto *FnTy = llvm::FunctionType::get(CGM.Int8Ty, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_is_spmd_exec_mode"); + break; + } + case OMPRTL_NVPTX__kmpc_get_team_static_memory: { + // Build void __kmpc_get_team_static_memory(int16_t isSPMDExecutionMode, + // const void *buf, size_t size, int16_t is_shared, const void **res); + llvm::Type *TypeParams[] = {CGM.Int16Ty, CGM.VoidPtrTy, CGM.SizeTy, + CGM.Int16Ty, CGM.VoidPtrPtrTy}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_get_team_static_memory"); + break; + } + case OMPRTL_NVPTX__kmpc_restore_team_static_memory: { + // Build void __kmpc_restore_team_static_memory(int16_t isSPMDExecutionMode, + // int16_t is_shared); + llvm::Type *TypeParams[] = {CGM.Int16Ty, CGM.Int16Ty}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); + RTLFn = + CGM.CreateRuntimeFunction(FnTy, "__kmpc_restore_team_static_memory"); + break; + } + case OMPRTL__kmpc_barrier: { + // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = + CGM.CreateConvergentRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); + break; + } + case OMPRTL__kmpc_barrier_simple_spmd: { + // Build void __kmpc_barrier_simple_spmd(ident_t *loc, kmp_int32 + // global_tid); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateConvergentRuntimeFunction( + FnTy, /*Name*/ "__kmpc_barrier_simple_spmd"); + break; + } + case OMPRTL_NVPTX__kmpc_warp_active_thread_mask: { + // Build int32_t __kmpc_warp_active_thread_mask(void); + auto *FnTy = + llvm::FunctionType::get(CGM.Int32Ty, llvm::None, /*isVarArg=*/false); + RTLFn = CGM.CreateConvergentRuntimeFunction(FnTy, "__kmpc_warp_active_thread_mask"); + break; + } + case OMPRTL_NVPTX__kmpc_syncwarp: { + // Build void __kmpc_syncwarp(kmp_int32 Mask); + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, CGM.Int32Ty, /*isVarArg=*/false); + RTLFn = CGM.CreateConvergentRuntimeFunction(FnTy, "__kmpc_syncwarp"); + break; + } + } + return RTLFn; +} + void CGOpenMPRuntimeGPU::createOffloadEntry(llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t, @@ -1749,14 +2157,12 @@ void CGOpenMPRuntimeGPU::emitGenericVarsProlog(CodeGenFunction &CGF, llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); llvm::Value *ThreadID = getThreadID(CGF, Loc); llvm::Value *PL = CGF.EmitRuntimeCall( - OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), - OMPRTL___kmpc_parallel_level), + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_parallel_level), {RTLoc, ThreadID}); IsTTD = Bld.CreateIsNull(PL); } - llvm::Value *IsSPMD = Bld.CreateIsNotNull( - CGF.EmitNounwindRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_is_spmd_exec_mode))); + llvm::Value *IsSPMD = Bld.CreateIsNotNull(CGF.EmitNounwindRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_is_spmd_exec_mode))); Bld.CreateCondBr(IsSPMD, SPMDBB, NonSPMDBB); // There is no need to emit line number for unconditional branch. (void)ApplyDebugLocation::CreateEmpty(CGF); @@ -1790,8 +2196,8 @@ void CGOpenMPRuntimeGPU::emitGenericVarsProlog(CodeGenFunction &CGF, llvm::Value *GlobalRecordSizeArg[] = { Size, CGF.Builder.getInt16(/*UseSharedMemory=*/0)}; llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall( - OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_data_sharing_coalesced_push_stack), + createNVPTXRuntimeFunction( + OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack), GlobalRecordSizeArg); GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast( GlobalRecValue, GlobalRecPtrTy); @@ -1853,10 +2259,9 @@ void CGOpenMPRuntimeGPU::emitGenericVarsProlog(CodeGenFunction &CGF, CGM.Int16Ty, getExecutionMode() == CGOpenMPRuntimeGPU::EM_SPMD ? 1 : 0), StaticGlobalized, Ld, IsInSharedMemory, ResAddr}; - CGF.EmitRuntimeCall( - OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_get_team_static_memory), - GlobalRecordSizeArg); + CGF.EmitRuntimeCall(createNVPTXRuntimeFunction( + OMPRTL_NVPTX__kmpc_get_team_static_memory), + GlobalRecordSizeArg); GlobalizedRecords.back().Buffer = StaticGlobalized; GlobalizedRecords.back().RecSize = RecSize; GlobalizedRecords.back().UseSharedMemory = UseSharedMemory; @@ -1883,10 +2288,10 @@ void CGOpenMPRuntimeGPU::emitGenericVarsProlog(CodeGenFunction &CGF, llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize), CGF.Builder.getInt16(UseSharedMemory ? 1 : 0)}; llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall( - OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), - IsInTTDRegion ? OMPRTL___kmpc_data_sharing_push_stack - : OMPRTL___kmpc_data_sharing_coalesced_push_stack), + createNVPTXRuntimeFunction( + IsInTTDRegion + ? OMPRTL_NVPTX__kmpc_data_sharing_push_stack + : OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack), GlobalRecordSizeArg); GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast( GlobalRecValue, GlobalRecPtrTy); @@ -1985,8 +2390,8 @@ void CGOpenMPRuntimeGPU::emitGenericVarsProlog(CodeGenFunction &CGF, llvm::Value *GlobalRecordSizeArg[] = { Size, CGF.Builder.getInt16(/*UseSharedMemory=*/0)}; llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall( - OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_data_sharing_coalesced_push_stack), + createNVPTXRuntimeFunction( + OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack), GlobalRecordSizeArg); llvm::Value *GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast( GlobalRecValue, CGF.ConvertTypeForMem(VD->getType())->getPointerTo()); @@ -2014,8 +2419,7 @@ void CGOpenMPRuntimeGPU::emitGenericVarsEpilog(CodeGenFunction &CGF, for (llvm::Value *Addr : llvm::reverse(I->getSecond().EscapedVariableLengthDeclsAddrs)) { CGF.EmitRuntimeCall( - OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_data_sharing_pop_stack), + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_data_sharing_pop_stack), Addr); } if (I->getSecond().GlobalRecordAddr) { @@ -2030,8 +2434,8 @@ void CGOpenMPRuntimeGPU::emitGenericVarsEpilog(CodeGenFunction &CGF, (void)ApplyDebugLocation::CreateEmpty(CGF); CGF.EmitBlock(NonSPMDBB); CGF.EmitRuntimeCall( - OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_data_sharing_pop_stack), + createNVPTXRuntimeFunction( + OMPRTL_NVPTX__kmpc_data_sharing_pop_stack), CGF.EmitCastToVoidPtr(I->getSecond().GlobalRecordAddr)); CGF.EmitBlock(ExitBB); } else if (!CGM.getLangOpts().OpenMPCUDATargetParallel && IsInTTDRegion) { @@ -2052,15 +2456,14 @@ void CGOpenMPRuntimeGPU::emitGenericVarsEpilog(CodeGenFunction &CGF, getExecutionMode() == CGOpenMPRuntimeGPU::EM_SPMD ? 1 : 0), IsInSharedMemory}; CGF.EmitRuntimeCall( - OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_restore_team_static_memory), + createNVPTXRuntimeFunction( + OMPRTL_NVPTX__kmpc_restore_team_static_memory), Args); } } else { - CGF.EmitRuntimeCall( - OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_data_sharing_pop_stack), - I->getSecond().GlobalRecordAddr); + CGF.EmitRuntimeCall(createNVPTXRuntimeFunction( + OMPRTL_NVPTX__kmpc_data_sharing_pop_stack), + I->getSecond().GlobalRecordAddr); } } } @@ -2132,11 +2535,9 @@ void CGOpenMPRuntimeGPU::emitNonSPMDParallelCall( llvm::Value *Args[] = {RTLoc, ThreadID}; NVPTXActionTy Action( - OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_serialized_parallel), + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_serialized_parallel), Args, - OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_end_serialized_parallel), + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_serialized_parallel), Args); RCG.setAction(Action); RCG(CGF); @@ -2152,8 +2553,7 @@ void CGOpenMPRuntimeGPU::emitNonSPMDParallelCall( // Prepare for parallel region. Indicate the outlined function. llvm::Value *Args[] = {ID}; CGF.EmitRuntimeCall( - OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_kernel_prepare_parallel), + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_prepare_parallel), Args); // Create a private scope that will globalize the arguments @@ -2170,10 +2570,9 @@ void CGOpenMPRuntimeGPU::emitNonSPMDParallelCall( llvm::Value *DataSharingArgs[] = { SharedArgsPtr, llvm::ConstantInt::get(CGM.SizeTy, CapturedVars.size())}; - CGF.EmitRuntimeCall( - OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_begin_sharing_variables), - DataSharingArgs); + CGF.EmitRuntimeCall(createNVPTXRuntimeFunction( + OMPRTL_NVPTX__kmpc_begin_sharing_variables), + DataSharingArgs); // Store variable address in a list of references to pass to workers. unsigned Idx = 0; @@ -2207,8 +2606,8 @@ void CGOpenMPRuntimeGPU::emitNonSPMDParallelCall( syncCTAThreads(CGF); if (!CapturedVars.empty()) - CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_end_sharing_variables)); + CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_sharing_variables)); // Remember for post-processing in worker loop. Work.emplace_back(WFn); @@ -2232,9 +2631,8 @@ void CGOpenMPRuntimeGPU::emitNonSPMDParallelCall( llvm::BasicBlock *SeqBB = CGF.createBasicBlock(".sequential"); llvm::BasicBlock *ParallelCheckBB = CGF.createBasicBlock(".parcheck"); llvm::BasicBlock *MasterBB = CGF.createBasicBlock(".master"); - llvm::Value *IsSPMD = Bld.CreateIsNotNull( - CGF.EmitNounwindRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_is_spmd_exec_mode))); + llvm::Value *IsSPMD = Bld.CreateIsNotNull(CGF.EmitNounwindRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_is_spmd_exec_mode))); Bld.CreateCondBr(IsSPMD, SeqBB, ParallelCheckBB); // There is no need to emit line number for unconditional branch. (void)ApplyDebugLocation::CreateEmpty(CGF); @@ -2242,8 +2640,7 @@ void CGOpenMPRuntimeGPU::emitNonSPMDParallelCall( llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); llvm::Value *ThreadID = getThreadID(CGF, Loc); llvm::Value *PL = CGF.EmitRuntimeCall( - OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), - OMPRTL___kmpc_parallel_level), + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_parallel_level), {RTLoc, ThreadID}); llvm::Value *Res = Bld.CreateIsNotNull(PL); Bld.CreateCondBr(Res, SeqBB, MasterBB); @@ -2307,11 +2704,9 @@ void CGOpenMPRuntimeGPU::emitSPMDParallelCall( llvm::Value *Args[] = {RTLoc, ThreadID}; NVPTXActionTy Action( - OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_serialized_parallel), + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_serialized_parallel), Args, - OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_end_serialized_parallel), + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_serialized_parallel), Args); RCG.setAction(Action); RCG(CGF); @@ -2341,9 +2736,9 @@ void CGOpenMPRuntimeGPU::syncCTAThreads(CodeGenFunction &CGF) { llvm::ConstantPointerNull::get( cast(getIdentTyPointerTy())), llvm::ConstantInt::get(CGF.Int32Ty, /*V=*/0, /*isSigned=*/true)}; - CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_barrier_simple_spmd), - Args); + llvm::CallInst *Call = CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL__kmpc_barrier_simple_spmd), Args); + Call->setConvergent(); } void CGOpenMPRuntimeGPU::emitBarrierCall(CodeGenFunction &CGF, @@ -2357,10 +2752,9 @@ void CGOpenMPRuntimeGPU::emitBarrierCall(CodeGenFunction &CGF, unsigned Flags = getDefaultFlagsForBarriers(Kind); llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), getThreadID(CGF, Loc)}; - - CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_barrier), - Args); + llvm::CallInst *Call = CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL__kmpc_barrier), Args); + Call->setConvergent(); } void CGOpenMPRuntimeGPU::emitCriticalRegion( @@ -2376,8 +2770,8 @@ void CGOpenMPRuntimeGPU::emitCriticalRegion( auto &RT = static_cast(CGF.CGM.getOpenMPRuntime()); // Get the mask of active threads in the warp. - llvm::Value *Mask = CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_warp_active_thread_mask)); + llvm::Value *Mask = CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_warp_active_thread_mask)); // Fetch team-local id of the thread. llvm::Value *ThreadID = RT.getGPUThreadID(CGF); @@ -2419,9 +2813,8 @@ void CGOpenMPRuntimeGPU::emitCriticalRegion( // counter variable and returns to the loop. CGF.EmitBlock(SyncBB); // Reconverge active threads in the warp. - (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_syncwarp), - Mask); + (void)CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_syncwarp), Mask); llvm::Value *IncCounterVal = CGF.Builder.CreateNSWAdd(CounterVal, CGF.Builder.getInt32(1)); @@ -2471,15 +2864,14 @@ static llvm::Value *createRuntimeShuffleFunction(CodeGenFunction &CGF, CGBuilderTy &Bld = CGF.Builder; CGOpenMPRuntimeGPU &RT = *(static_cast(&CGM.getOpenMPRuntime())); - llvm::OpenMPIRBuilder &OMPBuilder = RT.getOMPBuilder(); CharUnits Size = CGF.getContext().getTypeSizeInChars(ElemType); assert(Size.getQuantity() <= 8 && "Unsupported bitwidth in shuffle instruction."); - RuntimeFunction ShuffleFn = Size.getQuantity() <= 4 - ? OMPRTL___kmpc_shuffle_int32 - : OMPRTL___kmpc_shuffle_int64; + OpenMPRTLFunctionNVPTX ShuffleFn = Size.getQuantity() <= 4 + ? OMPRTL_NVPTX__kmpc_shuffle_int32 + : OMPRTL_NVPTX__kmpc_shuffle_int64; // Cast all types to 32- or 64-bit values before calling shuffle routines. QualType CastTy = CGF.getContext().getIntTypeForBitwidth( @@ -2489,8 +2881,7 @@ static llvm::Value *createRuntimeShuffleFunction(CodeGenFunction &CGF, Bld.CreateIntCast(RT.getGPUWarpSize(CGF), CGM.Int16Ty, /*isSigned=*/true); llvm::Value *ShuffledVal = CGF.EmitRuntimeCall( - OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), ShuffleFn), - {ElemCast, Offset, WarpSize}); + RT.createNVPTXRuntimeFunction(ShuffleFn), {ElemCast, Offset, WarpSize}); return castValueToType(CGF, ShuffledVal, CastTy, ElemType, Loc); } @@ -4000,8 +4391,8 @@ void CGOpenMPRuntimeGPU::emitReduction( InterWarpCopyFn}; Res = CGF.EmitRuntimeCall( - OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_nvptx_parallel_reduce_nowait_v2), + createNVPTXRuntimeFunction( + OMPRTL_NVPTX__kmpc_nvptx_parallel_reduce_nowait_v2), Args); } else { assert(TeamsReduction && "expected teams reduction."); @@ -4050,8 +4441,8 @@ void CGOpenMPRuntimeGPU::emitReduction( BufferToGlobalRedFn}; Res = CGF.EmitRuntimeCall( - OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_nvptx_teams_reduce_nowait_v2), + createNVPTXRuntimeFunction( + OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_v2), Args); } @@ -4086,8 +4477,7 @@ void CGOpenMPRuntimeGPU::emitReduction( RegionCodeGenTy RCG(CodeGen); NVPTXActionTy Action( nullptr, llvm::None, - OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_nvptx_end_reduce_nowait), + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_reduce_nowait), EndArgs); RCG.setAction(Action); RCG(CGF); @@ -4098,7 +4488,7 @@ void CGOpenMPRuntimeGPU::emitReduction( const VarDecl * CGOpenMPRuntimeGPU::translateParameter(const FieldDecl *FD, - const VarDecl *NativeParam) const { + const VarDecl *NativeParam) const { if (!NativeParam->getType()->isReferenceType()) return NativeParam; QualType ArgType = NativeParam->getType(); @@ -4248,9 +4638,9 @@ llvm::Function *CGOpenMPRuntimeGPU::createParallelDataSharingWrapper( CGF.CreateDefaultAlignTempAlloca(CGF.VoidPtrPtrTy, "global_args"); llvm::Value *GlobalArgsPtr = GlobalArgs.getPointer(); llvm::Value *DataSharingArgs[] = {GlobalArgsPtr}; - CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_get_shared_variables), - DataSharingArgs); + CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_get_shared_variables), + DataSharingArgs); // Retrieve the shared variables from the list of references returned // by the runtime. Pass the variables to the outlined function. diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h index 088ed2830fb81d..19085b582f5a0e 100644 --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -1068,6 +1068,16 @@ class CodeGenModule : public CodeGenTypeCache { llvm::AttributeList ExtraAttrs = llvm::AttributeList(), bool Local = false, bool AssumeConvergent = false); + /// Create or return a runtime function declaration with the specified type + /// and name. This will automatically add the convergent attribute to the + /// function declaration. + llvm::FunctionCallee CreateConvergentRuntimeFunction( + llvm::FunctionType *Ty, StringRef Name, + llvm::AttributeList ExtraAttrs = llvm::AttributeList(), + bool Local = false) { + return CreateRuntimeFunction(Ty, Name, ExtraAttrs, Local, true); + } + /// Create a new runtime global variable with the specified type and name. llvm::Constant *CreateRuntimeVariable(llvm::Type *Ty, StringRef Name); diff --git a/clang/test/OpenMP/nvptx_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_parallel_codegen.cpp index bd9c988d46e7a0..ad25e0d775d12b 100644 --- a/clang/test/OpenMP/nvptx_parallel_codegen.cpp +++ b/clang/test/OpenMP/nvptx_parallel_codegen.cpp @@ -91,7 +91,7 @@ int bar(int n){ // CHECK: br label {{%?}}[[AWAIT_WORK:.+]] // // CHECK: [[AWAIT_WORK]] -// CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) #[[#CONVERGENT:]] // CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]]) // CHECK: [[KPRB:%.+]] = zext i1 [[KPR]] to i8 // store i8 [[KPRB]], i8* [[OMP_EXEC_STATUS]], align 1 @@ -321,10 +321,10 @@ int bar(int n){ // CHECK: define internal void [[PARALLEL_FN4]]( // CHECK: [[A:%.+]] = alloca i[[SZ:32|64]], // CHECK: store i[[SZ]] 45, i[[SZ]]* %a, -// CHECK: call void @__kmpc_barrier(%struct.ident_t* @{{.+}}, i32 %{{.+}}) +// CHECK: call void @__kmpc_barrier(%struct.ident_t* @{{.+}}, i32 %{{.+}}) #[[#CONVERGENT:]] // CHECK: ret void -// CHECK: declare void @__kmpc_barrier(%struct.ident_t*, i32) #[[#CONVERGENT:]] +// CHECK: declare void @__kmpc_barrier(%struct.ident_t*, i32) #[[#CONVERGENT]] // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l58}}_worker() // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l58}}( @@ -377,6 +377,6 @@ int bar(int n){ // CHECK: declare i32 @__kmpc_warp_active_thread_mask() #[[#CONVERGENT:]] // CHECK: declare void @__kmpc_syncwarp(i32) #[[#CONVERGENT:]] -// CHECK: attributes #[[#CONVERGENT:]] = {{.*}} convergent {{.*}} +// CHECK: attributes #[[#CONVERGENT]] = {{.*}} convergent {{.*}} #endif diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index ff5e69df326164..e93f836ea3fadf 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -220,9 +220,6 @@ __OMP_FUNCTION_TYPE(KmpcDtor, false, Void, VoidPtr) __OMP_FUNCTION_TYPE(KmpcCopyCtor, false, VoidPtr, VoidPtr, VoidPtr) __OMP_FUNCTION_TYPE(TaskRoutineEntry, false, Int32, Int32, /* kmp_task_t */ VoidPtr) -__OMP_FUNCTION_TYPE(ShuffleReduce, false, Void, VoidPtr, Int16, Int16, Int16) -__OMP_FUNCTION_TYPE(InterWarpCopy, false, Void, VoidPtr, Int32) -__OMP_FUNCTION_TYPE(GlobalList, false, Void, VoidPtr, Int32, VoidPtr) #undef __OMP_FUNCTION_TYPE #undef OMP_FUNCTION_TYPE @@ -314,6 +311,8 @@ __OMP_RTL(__kmpc_omp_taskyield, false, Int32, IdentPtr, Int32, /* Int */ Int32) __OMP_RTL(__kmpc_push_num_threads, false, Void, IdentPtr, Int32, /* Int */ Int32) __OMP_RTL(__kmpc_push_proc_bind, false, Void, IdentPtr, Int32, /* Int */ Int32) +__OMP_RTL(__kmpc_serialized_parallel, false, Void, IdentPtr, Int32) +__OMP_RTL(__kmpc_end_serialized_parallel, false, Void, IdentPtr, Int32) __OMP_RTL(__kmpc_omp_reg_task_with_affinity, false, Int32, IdentPtr, Int32, /* kmp_task_t */ VoidPtr, Int32, /* kmp_task_affinity_info_t */ VoidPtr) @@ -519,42 +518,17 @@ __OMP_RTL(__tgt_push_mapper_component, false, Void, VoidPtr, VoidPtr, VoidPtr, __OMP_RTL(__kmpc_task_allow_completion_event, false, VoidPtr, IdentPtr, /* Int */ Int32, /* kmp_task_t */ VoidPtr) -/// OpenMP Device runtime functions -__OMP_RTL(__kmpc_kernel_init, false, Void, Int32, Int16) -__OMP_RTL(__kmpc_kernel_deinit, false, Void, Int16) -__OMP_RTL(__kmpc_spmd_kernel_init, false, Void, Int32, Int16, Int16) -__OMP_RTL(__kmpc_spmd_kernel_deinit_v2, false, Void, Int16) -__OMP_RTL(__kmpc_kernel_prepare_parallel, false, Void, VoidPtr) -__OMP_RTL(__kmpc_kernel_parallel, false, Int1, VoidPtrPtr) -__OMP_RTL(__kmpc_kernel_end_parallel, false, Void, ) -__OMP_RTL(__kmpc_serialized_parallel, false, Void, IdentPtr, Int32) -__OMP_RTL(__kmpc_end_serialized_parallel, false, Void, IdentPtr, Int32) -__OMP_RTL(__kmpc_shuffle_int32, false, Int32, Int32, Int16, Int16) -__OMP_RTL(__kmpc_nvptx_parallel_reduce_nowait_v2, false, Int32, IdentPtr, Int32, - Int32, SizeTy, VoidPtr, ShuffleReducePtr, InterWarpCopyPtr) -__OMP_RTL(__kmpc_nvptx_end_reduce_nowait, false, Void, Int32) -__OMP_RTL(__kmpc_nvptx_teams_reduce_nowait_v2, false, Int32, IdentPtr, Int32, - VoidPtr, Int32, VoidPtr, ShuffleReducePtr, InterWarpCopyPtr, - GlobalListPtr, GlobalListPtr, GlobalListPtr, GlobalListPtr) - -__OMP_RTL(__kmpc_shuffle_int64, false, Int64, Int64, Int16, Int16) __OMP_RTL(__kmpc_data_sharing_init_stack, false, Void, ) -__OMP_RTL(__kmpc_data_sharing_init_stack_spmd, false, Void, ) - -__OMP_RTL(__kmpc_data_sharing_coalesced_push_stack, false, VoidPtr, SizeTy, Int16) +__OMP_RTL(__kmpc_data_sharing_init_stack_spmd, false, Void, ) +__OMP_RTL(__kmpc_data_sharing_coalesced_push_stack, false, VoidPtr, SizeTy, + Int16) __OMP_RTL(__kmpc_data_sharing_push_stack, false, VoidPtr, SizeTy, Int16) __OMP_RTL(__kmpc_data_sharing_pop_stack, false, Void, VoidPtr) -__OMP_RTL(__kmpc_begin_sharing_variables, false, Void, VoidPtrPtrPtr, SizeTy) -__OMP_RTL(__kmpc_end_sharing_variables, false, Void, ) -__OMP_RTL(__kmpc_get_shared_variables, false, Void, VoidPtrPtrPtr) -__OMP_RTL(__kmpc_parallel_level, false, Int16, IdentPtr, Int32) -__OMP_RTL(__kmpc_is_spmd_exec_mode, false, Int8, ) -__OMP_RTL(__kmpc_get_team_static_memory, false, Void, Int16, VoidPtr, SizeTy, - Int16, VoidPtrPtr) -__OMP_RTL(__kmpc_restore_team_static_memory, false, Void, Int16, Int16) -__OMP_RTL(__kmpc_barrier_simple_spmd, false, Void, IdentPtr, Int32) -__OMP_RTL(__kmpc_warp_active_thread_mask, false, Int32, ) -__OMP_RTL(__kmpc_syncwarp, false, Void, Int32) + +/// Note that device runtime functions (in the following) do not necessarily +/// need attributes as we expect to see the definitions. +__OMP_RTL(__kmpc_kernel_parallel, false, Int1, VoidPtrPtr) +__OMP_RTL(__kmpc_kernel_prepare_parallel, false, Void, VoidPtr) __OMP_RTL(__last, false, Void, ) @@ -603,8 +577,8 @@ __OMP_ATTRS_SET(DefaultAttrs, __OMP_ATTRS_SET(BarrierAttrs, OptimisticAttributes - ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(Convergent)) - : AttributeSet(EnumAttr(NoUnwind), EnumAttr(Convergent))) + ? AttributeSet(EnumAttr(NoUnwind)) + : AttributeSet(EnumAttr(NoUnwind))) __OMP_ATTRS_SET(InaccessibleArgOnlyAttrs, OptimisticAttributes @@ -676,11 +650,6 @@ __OMP_ATTRS_SET(ReturnAlignedPtrAttrs, __OMP_RTL_ATTRS(__kmpc_barrier, BarrierAttrs, AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs)) -__OMP_RTL_ATTRS(__kmpc_barrier_simple_spmd, BarrierAttrs, AttributeSet(), - ParamAttrs(ReadOnlyPtrAttrs)) -__OMP_RTL_ATTRS(__kmpc_warp_active_thread_mask, BarrierAttrs, AttributeSet(), - ParamAttrs()) -__OMP_RTL_ATTRS(__kmpc_syncwarp, BarrierAttrs, AttributeSet(), ParamAttrs()) __OMP_RTL_ATTRS(__kmpc_cancel, InaccessibleArgOnlyAttrs, AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs)) __OMP_RTL_ATTRS(__kmpc_cancel_barrier, BarrierAttrs, AttributeSet(), diff --git a/llvm/test/Transforms/OpenMP/add_attributes.ll b/llvm/test/Transforms/OpenMP/add_attributes.ll index cf1bd246d2b295..e92447d79feac4 100644 --- a/llvm/test/Transforms/OpenMP/add_attributes.ll +++ b/llvm/test/Transforms/OpenMP/add_attributes.ll @@ -888,313 +888,313 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*) ; CHECK: declare dso_local i32 @omp_pause_resource_all(i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare dso_local i32 @omp_get_supported_active_levels() +; CHECK-NEXT: declare dso_local i32 @omp_get_supported_active_levels() #0 -; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare void @__kmpc_barrier(%struct.ident_t*, i32) +; CHECK: ; Function Attrs: nounwind +; CHECK-NEXT: declare void @__kmpc_barrier(%struct.ident_t*, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__kmpc_cancel(%struct.ident_t*, i32, i32) +; CHECK-NEXT: declare i32 @__kmpc_cancel(%struct.ident_t*, i32, i32) #0 -; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare i32 @__kmpc_cancel_barrier(%struct.ident_t*, i32) +; CHECK: ; Function Attrs: nounwind +; CHECK-NEXT: declare i32 @__kmpc_cancel_barrier(%struct.ident_t*, i32) #0 -; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare void @__kmpc_flush(%struct.ident_t*) +; CHECK: ; Function Attrs: nounwind +; CHECK-NEXT: declare void @__kmpc_flush(%struct.ident_t*) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__kmpc_global_thread_num(%struct.ident_t*) +; CHECK-NEXT: declare i32 @__kmpc_global_thread_num(%struct.ident_t*) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) +; CHECK-NEXT: declare void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) #0 -; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare i32 @__kmpc_omp_taskwait(%struct.ident_t*, i32) +; CHECK: ; Function Attrs: nounwind +; CHECK-NEXT: declare i32 @__kmpc_omp_taskwait(%struct.ident_t*, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__kmpc_omp_taskyield(%struct.ident_t*, i32, i32) +; CHECK-NEXT: declare i32 @__kmpc_omp_taskyield(%struct.ident_t*, i32, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_push_num_threads(%struct.ident_t*, i32, i32) +; CHECK-NEXT: declare void @__kmpc_push_num_threads(%struct.ident_t*, i32, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_push_proc_bind(%struct.ident_t*, i32, i32) +; CHECK-NEXT: declare void @__kmpc_push_proc_bind(%struct.ident_t*, i32, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_serialized_parallel(%struct.ident_t*, i32) +; CHECK-NEXT: declare void @__kmpc_serialized_parallel(%struct.ident_t*, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_end_serialized_parallel(%struct.ident_t*, i32) +; CHECK-NEXT: declare void @__kmpc_end_serialized_parallel(%struct.ident_t*, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__kmpc_master(%struct.ident_t*, i32) +; CHECK-NEXT: declare i32 @__kmpc_master(%struct.ident_t*, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_end_master(%struct.ident_t*, i32) +; CHECK-NEXT: declare void @__kmpc_end_master(%struct.ident_t*, i32) #0 -; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare void @__kmpc_critical(%struct.ident_t*, i32, [8 x i32]*) +; CHECK: ; Function Attrs: nounwind +; CHECK-NEXT: declare void @__kmpc_critical(%struct.ident_t*, i32, [8 x i32]*) #0 -; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare void @__kmpc_critical_with_hint(%struct.ident_t*, i32, [8 x i32]*, i32) +; CHECK: ; Function Attrs: nounwind +; CHECK-NEXT: declare void @__kmpc_critical_with_hint(%struct.ident_t*, i32, [8 x i32]*, i32) #0 -; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare void @__kmpc_end_critical(%struct.ident_t*, i32, [8 x i32]*) +; CHECK: ; Function Attrs: nounwind +; CHECK-NEXT: declare void @__kmpc_end_critical(%struct.ident_t*, i32, [8 x i32]*) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_begin(%struct.ident_t*, i32) +; CHECK-NEXT: declare void @__kmpc_begin(%struct.ident_t*, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_end(%struct.ident_t*) +; CHECK-NEXT: declare void @__kmpc_end(%struct.ident_t*) #0 -; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare i32 @__kmpc_reduce(%struct.ident_t*, i32, i32, i64, i8*, void (i8*, i8*)*, [8 x i32]*) +; CHECK: ; Function Attrs: nounwind +; CHECK-NEXT: declare i32 @__kmpc_reduce(%struct.ident_t*, i32, i32, i64, i8*, void (i8*, i8*)*, [8 x i32]*) #0 -; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare i32 @__kmpc_reduce_nowait(%struct.ident_t*, i32, i32, i64, i8*, void (i8*, i8*)*, [8 x i32]*) +; CHECK: ; Function Attrs: nounwind +; CHECK-NEXT: declare i32 @__kmpc_reduce_nowait(%struct.ident_t*, i32, i32, i64, i8*, void (i8*, i8*)*, [8 x i32]*) #0 -; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare void @__kmpc_end_reduce(%struct.ident_t*, i32, [8 x i32]*) +; CHECK: ; Function Attrs: nounwind +; CHECK-NEXT: declare void @__kmpc_end_reduce(%struct.ident_t*, i32, [8 x i32]*) #0 -; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare void @__kmpc_end_reduce_nowait(%struct.ident_t*, i32, [8 x i32]*) +; CHECK: ; Function Attrs: nounwind +; CHECK-NEXT: declare void @__kmpc_end_reduce_nowait(%struct.ident_t*, i32, [8 x i32]*) #0 -; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare void @__kmpc_ordered(%struct.ident_t*, i32) +; CHECK: ; Function Attrs: nounwind +; CHECK-NEXT: declare void @__kmpc_ordered(%struct.ident_t*, i32) #0 -; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare void @__kmpc_end_ordered(%struct.ident_t*, i32) +; CHECK: ; Function Attrs: nounwind +; CHECK-NEXT: declare void @__kmpc_end_ordered(%struct.ident_t*, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_for_static_init_4(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32, i32) +; CHECK-NEXT: declare void @__kmpc_for_static_init_4(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_for_static_init_4u(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32, i32) +; CHECK-NEXT: declare void @__kmpc_for_static_init_4u(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_for_static_init_8(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64, i64) +; CHECK-NEXT: declare void @__kmpc_for_static_init_8(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64, i64) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_for_static_init_8u(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64, i64) +; CHECK-NEXT: declare void @__kmpc_for_static_init_8u(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64, i64) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_for_static_fini(%struct.ident_t*, i32) +; CHECK-NEXT: declare void @__kmpc_for_static_fini(%struct.ident_t*, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_team_static_init_4(%struct.ident_t*, i32, i32*, i32*, i32*, i32*, i32, i32) +; CHECK-NEXT: declare void @__kmpc_team_static_init_4(%struct.ident_t*, i32, i32*, i32*, i32*, i32*, i32, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_team_static_init_4u(%struct.ident_t*, i32, i32*, i32*, i32*, i32*, i32, i32) +; CHECK-NEXT: declare void @__kmpc_team_static_init_4u(%struct.ident_t*, i32, i32*, i32*, i32*, i32*, i32, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_team_static_init_8(%struct.ident_t*, i32, i32*, i64*, i64*, i64*, i64, i64) +; CHECK-NEXT: declare void @__kmpc_team_static_init_8(%struct.ident_t*, i32, i32*, i64*, i64*, i64*, i64, i64) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_team_static_init_8u(%struct.ident_t*, i32, i32*, i64*, i64*, i64*, i64, i64) +; CHECK-NEXT: declare void @__kmpc_team_static_init_8u(%struct.ident_t*, i32, i32*, i64*, i64*, i64*, i64, i64) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_4(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32*, i32, i32) +; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_4(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32*, i32, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_4u(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32*, i32, i32) +; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_4u(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32*, i32, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_8(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64*, i64, i64) +; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_8(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64*, i64, i64) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_8u(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64*, i64, i64) +; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_8u(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64*, i64, i64) #0 -; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare i32 @__kmpc_single(%struct.ident_t*, i32) +; CHECK: ; Function Attrs: nounwind +; CHECK-NEXT: declare i32 @__kmpc_single(%struct.ident_t*, i32) #0 -; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare void @__kmpc_end_single(%struct.ident_t*, i32) +; CHECK: ; Function Attrs: nounwind +; CHECK-NEXT: declare void @__kmpc_end_single(%struct.ident_t*, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i8* @__kmpc_omp_task_alloc(%struct.ident_t*, i32, i32, i64, i64, i32 (i32, i8*)*) +; CHECK-NEXT: declare i8* @__kmpc_omp_task_alloc(%struct.ident_t*, i32, i32, i64, i64, i32 (i32, i8*)*) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__kmpc_omp_task(%struct.ident_t*, i32, i8*) +; CHECK-NEXT: declare i32 @__kmpc_omp_task(%struct.ident_t*, i32, i8*) #0 -; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare void @__kmpc_end_taskgroup(%struct.ident_t*, i32) +; CHECK: ; Function Attrs: nounwind +; CHECK-NEXT: declare void @__kmpc_end_taskgroup(%struct.ident_t*, i32) #0 -; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare void @__kmpc_taskgroup(%struct.ident_t*, i32) +; CHECK: ; Function Attrs: nounwind +; CHECK-NEXT: declare void @__kmpc_taskgroup(%struct.ident_t*, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_4(%struct.ident_t*, i32, i32, i32*, i32, i32, i32, i32) +; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_4(%struct.ident_t*, i32, i32, i32*, i32, i32, i32, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_4u(%struct.ident_t*, i32, i32, i32*, i32, i32, i32, i32) +; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_4u(%struct.ident_t*, i32, i32, i32*, i32, i32, i32, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_8(%struct.ident_t*, i32, i32, i32*, i64, i64, i64, i64) +; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_8(%struct.ident_t*, i32, i32, i32*, i64, i64, i64, i64) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_8u(%struct.ident_t*, i32, i32, i32*, i64, i64, i64, i64) +; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_8u(%struct.ident_t*, i32, i32, i32*, i64, i64, i64, i64) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dispatch_init_4(%struct.ident_t*, i32, i32, i32, i32, i32, i32) +; CHECK-NEXT: declare void @__kmpc_dispatch_init_4(%struct.ident_t*, i32, i32, i32, i32, i32, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dispatch_init_4u(%struct.ident_t*, i32, i32, i32, i32, i32, i32) +; CHECK-NEXT: declare void @__kmpc_dispatch_init_4u(%struct.ident_t*, i32, i32, i32, i32, i32, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dispatch_init_8(%struct.ident_t*, i32, i32, i64, i64, i64, i64) +; CHECK-NEXT: declare void @__kmpc_dispatch_init_8(%struct.ident_t*, i32, i32, i64, i64, i64, i64) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dispatch_init_8u(%struct.ident_t*, i32, i32, i64, i64, i64, i64) +; CHECK-NEXT: declare void @__kmpc_dispatch_init_8u(%struct.ident_t*, i32, i32, i64, i64, i64, i64) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_4(%struct.ident_t*, i32, i32*, i32*, i32*, i32*) +; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_4(%struct.ident_t*, i32, i32*, i32*, i32*, i32*) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_4u(%struct.ident_t*, i32, i32*, i32*, i32*, i32*) +; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_4u(%struct.ident_t*, i32, i32*, i32*, i32*, i32*) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_8(%struct.ident_t*, i32, i32*, i64*, i64*, i64*) +; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_8(%struct.ident_t*, i32, i32*, i64*, i64*, i64*) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_8u(%struct.ident_t*, i32, i32*, i64*, i64*, i64*) +; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_8u(%struct.ident_t*, i32, i32*, i64*, i64*, i64*) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dispatch_fini_4(%struct.ident_t*, i32) +; CHECK-NEXT: declare void @__kmpc_dispatch_fini_4(%struct.ident_t*, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dispatch_fini_4u(%struct.ident_t*, i32) +; CHECK-NEXT: declare void @__kmpc_dispatch_fini_4u(%struct.ident_t*, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dispatch_fini_8(%struct.ident_t*, i32) +; CHECK-NEXT: declare void @__kmpc_dispatch_fini_8(%struct.ident_t*, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dispatch_fini_8u(%struct.ident_t*, i32) +; CHECK-NEXT: declare void @__kmpc_dispatch_fini_8u(%struct.ident_t*, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_omp_task_begin_if0(%struct.ident_t*, i32, i8*) +; CHECK-NEXT: declare void @__kmpc_omp_task_begin_if0(%struct.ident_t*, i32, i8*) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_omp_task_complete_if0(%struct.ident_t*, i32, i8*) +; CHECK-NEXT: declare void @__kmpc_omp_task_complete_if0(%struct.ident_t*, i32, i8*) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__kmpc_omp_task_with_deps(%struct.ident_t*, i32, i8*, i32, i8*, i32, i8*) +; CHECK-NEXT: declare i32 @__kmpc_omp_task_with_deps(%struct.ident_t*, i32, i8*, i32, i8*, i32, i8*) #0 -; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare void @__kmpc_omp_wait_deps(%struct.ident_t*, i32, i32, i8*, i32, i8*) +; CHECK: ; Function Attrs: nounwind +; CHECK-NEXT: declare void @__kmpc_omp_wait_deps(%struct.ident_t*, i32, i32, i8*, i32, i8*) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__kmpc_cancellationpoint(%struct.ident_t*, i32, i32) +; CHECK-NEXT: declare i32 @__kmpc_cancellationpoint(%struct.ident_t*, i32, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_push_num_teams(%struct.ident_t*, i32, i32, i32) +; CHECK-NEXT: declare void @__kmpc_push_num_teams(%struct.ident_t*, i32, i32, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_fork_teams(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) +; CHECK-NEXT: declare void @__kmpc_fork_teams(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_taskloop(%struct.ident_t*, i32, i8*, i32, i64*, i64*, i64, i32, i32, i64, i8*) +; CHECK-NEXT: declare void @__kmpc_taskloop(%struct.ident_t*, i32, i8*, i32, i64*, i64*, i64, i32, i32, i64, i8*) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i8* @__kmpc_omp_target_task_alloc(%struct.ident_t*, i32, i32, i64, i64, i32 (i32, i8*)*, i64) +; CHECK-NEXT: declare i8* @__kmpc_omp_target_task_alloc(%struct.ident_t*, i32, i32, i64, i64, i32 (i32, i8*)*, i64) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i8* @__kmpc_taskred_modifier_init(%struct.ident_t*, i32, i32, i32, i8*) +; CHECK-NEXT: declare i8* @__kmpc_taskred_modifier_init(%struct.ident_t*, i32, i32, i32, i8*) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i8* @__kmpc_taskred_init(i32, i32, i8*) +; CHECK-NEXT: declare i8* @__kmpc_taskred_init(i32, i32, i8*) #0 -; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare void @__kmpc_task_reduction_modifier_fini(%struct.ident_t*, i32, i32) +; CHECK: ; Function Attrs: nounwind +; CHECK-NEXT: declare void @__kmpc_task_reduction_modifier_fini(%struct.ident_t*, i32, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_copyprivate(%struct.ident_t*, i32, i64, i8*, void (i8*, i8*)*, i32) +; CHECK-NEXT: declare void @__kmpc_copyprivate(%struct.ident_t*, i32, i64, i8*, void (i8*, i8*)*, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i8* @__kmpc_threadprivate_cached(%struct.ident_t*, i32, i8*, i64, i8***) +; CHECK-NEXT: declare i8* @__kmpc_threadprivate_cached(%struct.ident_t*, i32, i8*, i64, i8***) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_threadprivate_register(%struct.ident_t*, i8*, i8* (i8*)*, i8* (i8*, i8*)*, void (i8*)*) +; CHECK-NEXT: declare void @__kmpc_threadprivate_register(%struct.ident_t*, i8*, i8* (i8*)*, i8* (i8*, i8*)*, void (i8*)*) #0 -; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare void @__kmpc_doacross_init(%struct.ident_t*, i32, i32, i8*) +; CHECK: ; Function Attrs: nounwind +; CHECK-NEXT: declare void @__kmpc_doacross_init(%struct.ident_t*, i32, i32, i8*) #0 -; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare void @__kmpc_doacross_wait(%struct.ident_t*, i32, i64*) +; CHECK: ; Function Attrs: nounwind +; CHECK-NEXT: declare void @__kmpc_doacross_wait(%struct.ident_t*, i32, i64*) #0 -; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare void @__kmpc_doacross_post(%struct.ident_t*, i32, i64*) +; CHECK: ; Function Attrs: nounwind +; CHECK-NEXT: declare void @__kmpc_doacross_post(%struct.ident_t*, i32, i64*) #0 -; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare void @__kmpc_doacross_fini(%struct.ident_t*, i32) +; CHECK: ; Function Attrs: nounwind +; CHECK-NEXT: declare void @__kmpc_doacross_fini(%struct.ident_t*, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i8* @__kmpc_alloc(i32, i64, i8*) +; CHECK-NEXT: declare i8* @__kmpc_alloc(i32, i64, i8*) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_free(i32, i8*, i8*) +; CHECK-NEXT: declare void @__kmpc_free(i32, i8*, i8*) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i8* @__kmpc_init_allocator(i32, i8*, i32, i8*) +; CHECK-NEXT: declare i8* @__kmpc_init_allocator(i32, i8*, i32, i8*) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_destroy_allocator(i32, i8*) +; CHECK-NEXT: declare void @__kmpc_destroy_allocator(i32, i8*) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_push_target_tripcount(i64, i64) +; CHECK-NEXT: declare void @__kmpc_push_target_tripcount(i64, i64) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__tgt_target_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**) +; CHECK-NEXT: declare i32 @__tgt_target_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__tgt_target_nowait_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**) +; CHECK-NEXT: declare i32 @__tgt_target_nowait_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__tgt_target_teams_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i32, i32) +; CHECK-NEXT: declare i32 @__tgt_target_teams_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i32, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__tgt_target_teams_nowait_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i32, i32) +; CHECK-NEXT: declare i32 @__tgt_target_teams_nowait_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i32, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__tgt_register_requires(i64) +; CHECK-NEXT: declare void @__tgt_register_requires(i64) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__tgt_target_data_begin_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) +; CHECK-NEXT: declare void @__tgt_target_data_begin_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__tgt_target_data_begin_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) +; CHECK-NEXT: declare void @__tgt_target_data_begin_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__tgt_target_data_end_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) +; CHECK-NEXT: declare void @__tgt_target_data_end_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__tgt_target_data_end_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) +; CHECK-NEXT: declare void @__tgt_target_data_end_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__tgt_target_data_update_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) +; CHECK-NEXT: declare void @__tgt_target_data_update_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__tgt_target_data_update_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) +; CHECK-NEXT: declare void @__tgt_target_data_update_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i64 @__tgt_mapper_num_components(i8*) +; CHECK-NEXT: declare i64 @__tgt_mapper_num_components(i8*) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__tgt_push_mapper_component(i8*, i8*, i8*, i64, i64) +; CHECK-NEXT: declare void @__tgt_push_mapper_component(i8*, i8*, i8*, i64, i64) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i8* @__kmpc_task_allow_completion_event(%struct.ident_t*, i32, i8*) +; CHECK-NEXT: declare i8* @__kmpc_task_allow_completion_event(%struct.ident_t*, i32, i8*) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i8* @__kmpc_task_reduction_get_th_data(i32, i8*, i8*) +; CHECK-NEXT: declare i8* @__kmpc_task_reduction_get_th_data(i32, i8*, i8*) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i8* @__kmpc_task_reduction_init(i32, i32, i8*) +; CHECK-NEXT: declare i8* @__kmpc_task_reduction_init(i32, i32, i8*) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i8* @__kmpc_task_reduction_modifier_init(i8*, i32, i32, i32, i8*) +; CHECK-NEXT: declare i8* @__kmpc_task_reduction_modifier_init(i8*, i32, i32, i32, i8*) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_proxy_task_completed_ooo(i8*) +; CHECK-NEXT: declare void @__kmpc_proxy_task_completed_ooo(i8*) #0 ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn writeonly ; OPTIMISTIC-NEXT: declare dso_local void @omp_set_num_threads(i32) @@ -1212,52 +1212,52 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*) ; OPTIMISTIC-NEXT: declare dso_local void @omp_set_schedule(i32, i32) ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_num_threads() +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_num_threads() #1 ; OPTIMISTIC-NOT: Function Attrs ; OPTIMISTIC: declare dso_local void @use_int(i32) ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_dynamic() +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_dynamic() #1 ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_nested() +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_nested() #1 ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_max_threads() +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_max_threads() #1 ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_thread_num() +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_thread_num() #1 ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_num_procs() +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_num_procs() #1 ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_in_parallel() +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_in_parallel() #1 ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_in_final() +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_in_final() #1 ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_active_level() +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_active_level() #1 ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_level() +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_level() #1 ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_ancestor_thread_num(i32) +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_ancestor_thread_num(i32) #1 ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_team_size(i32) +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_team_size(i32) #1 ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_thread_limit() +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_thread_limit() #1 ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_max_active_levels() +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_max_active_levels() #1 ; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn -; OPTIMISTIC-NEXT: declare dso_local void @omp_get_schedule(i32* nocapture writeonly, i32* nocapture writeonly) +; OPTIMISTIC-NEXT: declare dso_local void @omp_get_schedule(i32* nocapture writeonly, i32* nocapture writeonly) #2 ; OPTIMISTIC-NOT: Function Attrs ; OPTIMISTIC: declare dso_local i32 @omp_get_max_task_priority() @@ -1326,7 +1326,7 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*) ; OPTIMISTIC: declare dso_local i32 @omp_get_team_num() ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_cancellation() +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_cancellation() #1 ; OPTIMISTIC-NOT: Function Attrs ; OPTIMISTIC: declare dso_local i32 @omp_get_initial_device() @@ -1356,25 +1356,25 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*) ; OPTIMISTIC: declare dso_local i32 @omp_get_device_num() ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_proc_bind() +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_proc_bind() #1 ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_num_places() +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_num_places() #1 ; OPTIMISTIC-NOT: Function Attrs ; OPTIMISTIC: declare dso_local i32 @omp_get_place_num_procs(i32) ; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind -; OPTIMISTIC-NEXT: declare dso_local void @omp_get_place_proc_ids(i32, i32* nocapture writeonly) +; OPTIMISTIC-NEXT: declare dso_local void @omp_get_place_proc_ids(i32, i32* nocapture writeonly) #2 ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_place_num() +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_place_num() #1 ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_partition_num_places() +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_partition_num_places() #1 ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly -; OPTIMISTIC-NEXT: declare dso_local void @omp_get_partition_place_nums(i32*) +; OPTIMISTIC-NEXT: declare dso_local void @omp_get_partition_place_nums(i32*) #1 ; OPTIMISTIC-NOT: Function Attrs ; OPTIMISTIC: declare dso_local i32 @omp_control_tool(i32, i32, i8*) @@ -1419,7 +1419,7 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*) ; OPTIMISTIC: declare dso_local i32 @omp_pause_resource_all(i32) ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_supported_active_levels() +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_supported_active_levels() #1 ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn ; OPTIMISTIC-NEXT: declare i32 @__kmpc_global_thread_num(%struct.ident_t* nocapture nofree readonly) @@ -1427,7 +1427,7 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*) ; OPTIMISTIC: ; Function Attrs: nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_fork_call(%struct.ident_t* nocapture nofree readonly, i32, void (i32*, i32*, ...)* nocapture nofree readonly, ...) -; OPTIMISTIC: ; Function Attrs: convergent nounwind +; OPTIMISTIC: ; Function Attrs: nounwind ; OPTIMISTIC-NEXT: declare i32 @__kmpc_omp_taskwait(%struct.ident_t* nocapture nofree readonly, i32) ; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn @@ -1451,13 +1451,13 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*) ; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn ; OPTIMISTIC-NEXT: declare void @__kmpc_end_master(%struct.ident_t* nocapture nofree readonly, i32) -; OPTIMISTIC: ; Function Attrs: convergent nounwind +; OPTIMISTIC: ; Function Attrs: nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_critical(%struct.ident_t* nocapture nofree readonly, i32, [8 x i32]*) -; OPTIMISTIC: ; Function Attrs: convergent nounwind +; OPTIMISTIC: ; Function Attrs: nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_critical_with_hint(%struct.ident_t* nocapture nofree readonly, i32, [8 x i32]*, i32) -; OPTIMISTIC: ; Function Attrs: convergent nounwind +; OPTIMISTIC: ; Function Attrs: nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_end_critical(%struct.ident_t* nocapture nofree readonly, i32, [8 x i32]*) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn @@ -1466,22 +1466,22 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn ; OPTIMISTIC-NEXT: declare void @__kmpc_end(%struct.ident_t* nocapture nofree readonly) -; OPTIMISTIC: ; Function Attrs: convergent nounwind +; OPTIMISTIC: ; Function Attrs: nounwind ; OPTIMISTIC-NEXT: declare i32 @__kmpc_reduce(%struct.ident_t* nocapture nofree readonly, i32, i32, i64, i8* nocapture nofree readonly, void (i8*, i8*)*, [8 x i32]*) -; OPTIMISTIC: ; Function Attrs: convergent nounwind +; OPTIMISTIC: ; Function Attrs: nounwind ; OPTIMISTIC-NEXT: declare i32 @__kmpc_reduce_nowait(%struct.ident_t* nocapture nofree readonly, i32, i32, i64, i8* nocapture nofree readonly, void (i8*, i8*)*, [8 x i32]*) -; OPTIMISTIC: ; Function Attrs: convergent nounwind +; OPTIMISTIC: ; Function Attrs: nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_end_reduce(%struct.ident_t* nocapture nofree readonly, i32, [8 x i32]*) -; OPTIMISTIC: ; Function Attrs: convergent nounwind +; OPTIMISTIC: ; Function Attrs: nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_end_reduce_nowait(%struct.ident_t* nocapture nofree readonly, i32, [8 x i32]*) -; OPTIMISTIC: ; Function Attrs: convergent nounwind +; OPTIMISTIC: ; Function Attrs: nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_ordered(%struct.ident_t* nocapture nofree readonly, i32) -; OPTIMISTIC: ; Function Attrs: convergent nounwind +; OPTIMISTIC: ; Function Attrs: nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_end_ordered(%struct.ident_t* nocapture nofree readonly, i32) ; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn @@ -1523,10 +1523,10 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*) ; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn ; OPTIMISTIC-NEXT: declare void @__kmpc_dist_for_static_init_8u(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64, i64) -; OPTIMISTIC: ; Function Attrs: convergent nounwind +; OPTIMISTIC: ; Function Attrs: nounwind ; OPTIMISTIC-NEXT: declare i32 @__kmpc_single(%struct.ident_t* nocapture nofree readonly, i32) -; OPTIMISTIC: ; Function Attrs: convergent nounwind +; OPTIMISTIC: ; Function Attrs: nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_end_single(%struct.ident_t* nocapture nofree readonly, i32) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn @@ -1535,10 +1535,10 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn ; OPTIMISTIC-NEXT: declare i32 @__kmpc_omp_task(%struct.ident_t* nocapture nofree readonly, i32, i8*) -; OPTIMISTIC: ; Function Attrs: convergent nounwind +; OPTIMISTIC: ; Function Attrs: nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_end_taskgroup(%struct.ident_t* nocapture nofree readonly, i32) -; OPTIMISTIC: ; Function Attrs: convergent nounwind +; OPTIMISTIC: ; Function Attrs: nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_taskgroup(%struct.ident_t* nocapture nofree readonly, i32) ; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn @@ -1598,7 +1598,7 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn ; OPTIMISTIC-NEXT: declare i32 @__kmpc_omp_task_with_deps(%struct.ident_t* nocapture nofree readonly, i32, i8*, i32, i8* nocapture nofree readonly, i32, i8* nocapture nofree readonly) -; OPTIMISTIC: ; Function Attrs: convergent nounwind +; OPTIMISTIC: ; Function Attrs: nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_omp_wait_deps(%struct.ident_t* nocapture nofree readonly, i32, i32, i8* nocapture nofree readonly, i32, i8*) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn @@ -1622,7 +1622,7 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn ; OPTIMISTIC-NEXT: declare i8* @__kmpc_taskred_init(i32, i32, i8*) -; OPTIMISTIC: ; Function Attrs: convergent nounwind +; OPTIMISTIC: ; Function Attrs: nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* nocapture nofree readonly, i32, i32) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn @@ -1634,16 +1634,16 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn ; OPTIMISTIC-NEXT: declare void @__kmpc_threadprivate_register(%struct.ident_t* nocapture nofree readonly, i8*, i8* (i8*)* nocapture nofree readonly, i8* (i8*, i8*)* nocapture nofree readonly, void (i8*)* nocapture nofree readonly) -; OPTIMISTIC: ; Function Attrs: convergent nounwind +; OPTIMISTIC: ; Function Attrs: nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_doacross_init(%struct.ident_t* nocapture nofree readonly, i32, i32, i8*) -; OPTIMISTIC: ; Function Attrs: convergent nounwind +; OPTIMISTIC: ; Function Attrs: nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_doacross_wait(%struct.ident_t* nocapture nofree readonly, i32, i64* nocapture nofree readonly) -; OPTIMISTIC: ; Function Attrs: convergent nounwind +; OPTIMISTIC: ; Function Attrs: nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_doacross_post(%struct.ident_t* nocapture nofree readonly, i32, i64* nocapture nofree readonly) -; OPTIMISTIC: ; Function Attrs: convergent nounwind +; OPTIMISTIC: ; Function Attrs: nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_doacross_fini(%struct.ident_t* nocapture nofree readonly, i32) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn