diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index 941bc999f0853..3f231703aa651 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -781,7 +781,7 @@ void CGOpenMPRuntimeGPU::emitKernelDeinit(CodeGenFunction &CGF, emitGenericVarsEpilog(CGF); CGBuilderTy &Bld = CGF.Builder; - OMPBuilder.createTargetDeinit(Bld); + OMPBuilder.createTargetDeinit(Bld, IsSPMD); } void CGOpenMPRuntimeGPU::emitSPMDKernel(const OMPExecutableDirective &D, @@ -819,6 +819,24 @@ void CGOpenMPRuntimeGPU::emitSPMDKernel(const OMPExecutableDirective &D, IsInTTDRegion = false; } +// Create a unique global variable to indicate the execution mode of this target +// region. The execution mode is either 'generic', or 'spmd' depending on the +// target directive. This variable is picked up by the offload library to setup +// the device appropriately before kernel launch. If the execution mode is +// 'generic', the runtime reserves one warp for the master, otherwise, all +// warps participate in parallel work. +static void setPropertyExecutionMode(CodeGenModule &CGM, StringRef Name, + bool Mode) { + auto *GVMode = new llvm::GlobalVariable( + CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, + llvm::GlobalValue::WeakAnyLinkage, + llvm::ConstantInt::get(CGM.Int8Ty, Mode ? OMP_TGT_EXEC_MODE_SPMD + : OMP_TGT_EXEC_MODE_GENERIC), + Twine(Name, "_exec_mode")); + GVMode->setVisibility(llvm::GlobalVariable::ProtectedVisibility); + CGM.addCompilerUsedGlobal(GVMode); +} + void CGOpenMPRuntimeGPU::emitTargetOutlinedFunction( const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, @@ -835,6 +853,8 @@ void CGOpenMPRuntimeGPU::emitTargetOutlinedFunction( else emitNonSPMDKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, CodeGen); + + setPropertyExecutionMode(CGM, OutlinedFn->getName(), Mode); } CGOpenMPRuntimeGPU::CGOpenMPRuntimeGPU(CodeGenModule &CGM) diff --git a/clang/test/Headers/amdgcn_openmp_device_math_c.c b/clang/test/Headers/amdgcn_openmp_device_math_c.c index 96630cfb404e2..e8c3f3c88ce69 100644 --- a/clang/test/Headers/amdgcn_openmp_device_math_c.c +++ b/clang/test/Headers/amdgcn_openmp_device_math_c.c @@ -36,7 +36,7 @@ void test_math_long_long(long long x) { // CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr // CHECK-NEXT: [[L1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[L1]] to ptr // CHECK-NEXT: store i64 [[X]], ptr [[X_ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_test_math_int_l9_kernel_environment to ptr)) +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr), i8 1, i1 true) // CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK: user_code.entry: @@ -54,7 +54,7 @@ void test_math_long_long(long long x) { // CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[__SGN_ASCAST_I]], align 4 // CHECK-NEXT: [[SUB_I:%.*]] = sub nsw i32 [[XOR_I]], [[TMP5]] // CHECK-NEXT: store i32 [[SUB_I]], ptr [[L1_ASCAST]], align 4 -// CHECK-NEXT: call void @__kmpc_target_deinit() +// CHECK-NEXT: call void @__kmpc_target_deinit(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i8 1) // CHECK-NEXT: ret void // CHECK: worker.exit: // CHECK-NEXT: ret void @@ -71,7 +71,7 @@ void test_math_long_long(long long x) { // CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr // CHECK-NEXT: [[L1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[L1]] to ptr // CHECK-NEXT: store i64 [[X]], ptr [[X_ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_test_math_long_l16_kernel_environment to ptr)) +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i8 1, i1 true) // CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK: user_code.entry: @@ -89,7 +89,7 @@ void test_math_long_long(long long x) { // CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[__SGN_ASCAST_I]], align 8 // CHECK-NEXT: [[SUB_I:%.*]] = sub nsw i64 [[XOR_I]], [[TMP5]] // CHECK-NEXT: store i64 [[SUB_I]], ptr [[L1_ASCAST]], align 8 -// CHECK-NEXT: call void @__kmpc_target_deinit() +// CHECK-NEXT: call void @__kmpc_target_deinit(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i8 1) // CHECK-NEXT: ret void // CHECK: worker.exit: // CHECK-NEXT: ret void @@ -106,7 +106,7 @@ void test_math_long_long(long long x) { // CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr // CHECK-NEXT: [[L1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[L1]] to ptr // CHECK-NEXT: store i64 [[X]], ptr [[X_ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_test_math_long_long_l23_kernel_environment to ptr)) +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i8 1, i1 true) // CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK: user_code.entry: @@ -124,7 +124,7 @@ void test_math_long_long(long long x) { // CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[__SGN_ASCAST_I]], align 8 // CHECK-NEXT: [[SUB_I:%.*]] = sub nsw i64 [[XOR_I]], [[TMP5]] // CHECK-NEXT: store i64 [[SUB_I]], ptr [[L1_ASCAST]], align 8 -// CHECK-NEXT: call void @__kmpc_target_deinit() +// CHECK-NEXT: call void @__kmpc_target_deinit(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i8 1) // CHECK-NEXT: ret void // CHECK: worker.exit: // CHECK-NEXT: ret void diff --git a/clang/test/OpenMP/amdgcn_target_codegen.cpp b/clang/test/OpenMP/amdgcn_target_codegen.cpp index b877e177cb6fb..b1a92db53bda5 100644 --- a/clang/test/OpenMP/amdgcn_target_codegen.cpp +++ b/clang/test/OpenMP/amdgcn_target_codegen.cpp @@ -37,7 +37,7 @@ int test_amdgcn_target_tid_threads_simd() { // CHECK-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr // CHECK-NEXT: store ptr [[ARR]], ptr [[ARR_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARR_ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z30test_amdgcn_target_tid_threadsv_l14_kernel_environment to ptr)) +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr), i8 1, i1 true) // CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK: user_code.entry: @@ -61,7 +61,7 @@ int test_amdgcn_target_tid_threads_simd() { // CHECK: worker.exit: // CHECK-NEXT: ret void // CHECK: for.end: -// CHECK-NEXT: call void @__kmpc_target_deinit() +// CHECK-NEXT: call void @__kmpc_target_deinit(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i8 1) // CHECK-NEXT: ret void // // @@ -78,7 +78,7 @@ int test_amdgcn_target_tid_threads_simd() { // CHECK-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr // CHECK-NEXT: store ptr [[ARR]], ptr [[ARR_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARR_ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z35test_amdgcn_target_tid_threads_simdv_l23_kernel_environment to ptr)) +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i8 2, i1 false) // CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK: user_code.entry: @@ -109,6 +109,6 @@ int test_amdgcn_target_tid_threads_simd() { // CHECK-NEXT: ret void // CHECK: omp.inner.for.end: // CHECK-NEXT: store i32 1000, ptr [[I_ASCAST]], align 4 -// CHECK-NEXT: call void @__kmpc_target_deinit() +// CHECK-NEXT: call void @__kmpc_target_deinit(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i8 2) // CHECK-NEXT: ret void // diff --git a/clang/test/OpenMP/amdgpu_target_with_aligned_attribute.c b/clang/test/OpenMP/amdgpu_target_with_aligned_attribute.c index 04d175db58bc9..c674a1d6dd5c5 100644 --- a/clang/test/OpenMP/amdgpu_target_with_aligned_attribute.c +++ b/clang/test/OpenMP/amdgpu_target_with_aligned_attribute.c @@ -33,11 +33,11 @@ void write_to_aligned_array(int *a, int N) { // CHECK-AMD-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr // CHECK-AMD-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8 // CHECK-AMD-NEXT: store ptr [[APTR]], ptr [[APTR_ADDR_ASCAST]], align 8 -// CHECK-AMD-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_write_to_aligned_array_l14_kernel_environment to ptr)) +// CHECK-AMD-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr), i8 2, i1 false) // CHECK-AMD-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-AMD-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-AMD: user_code.entry: -// CHECK-AMD-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr)) +// CHECK-AMD-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr)) // CHECK-AMD-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 // CHECK-AMD-NEXT: store i32 [[TMP2]], ptr [[N_CASTED_ASCAST]], align 4 // CHECK-AMD-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8 @@ -45,7 +45,7 @@ void write_to_aligned_array(int *a, int N) { // CHECK-AMD-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4 // CHECK-AMD-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4 // CHECK-AMD-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_write_to_aligned_array_l14_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP3]], ptr [[TMP4]]) #[[ATTR2:[0-9]+]] -// CHECK-AMD-NEXT: call void @__kmpc_target_deinit() +// CHECK-AMD-NEXT: call void @__kmpc_target_deinit(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i8 2) // CHECK-AMD-NEXT: ret void // CHECK-AMD: worker.exit: // CHECK-AMD-NEXT: ret void diff --git a/clang/test/OpenMP/declare_target_codegen_globalization.cpp b/clang/test/OpenMP/declare_target_codegen_globalization.cpp index 1f245fa8c50ac..edf8aec1faf56 100644 --- a/clang/test/OpenMP/declare_target_codegen_globalization.cpp +++ b/clang/test/OpenMP/declare_target_codegen_globalization.cpp @@ -31,15 +31,15 @@ int maini1() { // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6maini1v_l16_kernel_environment) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 // CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6maini1v_l16_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1) -// CHECK1-NEXT: call void @__kmpc_target_deinit() +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_SPMD_codegen.cpp b/clang/test/OpenMP/nvptx_SPMD_codegen.cpp index 3776d4f55a808..e0ee1dd960b2f 100644 --- a/clang/test/OpenMP/nvptx_SPMD_codegen.cpp +++ b/clang/test/OpenMP/nvptx_SPMD_codegen.cpp @@ -281,11 +281,11 @@ int a; // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-64-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK-64-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK-64-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* @@ -295,7 +295,7 @@ int a; // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP3]]) #[[ATTR2:[0-9]+]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -703,7 +703,7 @@ int a; // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l18_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -711,7 +711,7 @@ int a; // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l18_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -895,7 +895,7 @@ int a; // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l21_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -903,7 +903,7 @@ int a; // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l21_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -1078,7 +1078,7 @@ int a; // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l24_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -1086,7 +1086,7 @@ int a; // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l24_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -1268,7 +1268,7 @@ int a; // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l27_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -1276,7 +1276,7 @@ int a; // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l27_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -1458,7 +1458,7 @@ int a; // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l30_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -1466,7 +1466,7 @@ int a; // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l30_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -1648,7 +1648,7 @@ int a; // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l33_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -1656,7 +1656,7 @@ int a; // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l33_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -1842,7 +1842,7 @@ int a; // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l37_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -1854,7 +1854,7 @@ int a; // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l37_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP3]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -2051,7 +2051,7 @@ int a; // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l40_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -2059,7 +2059,7 @@ int a; // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l40_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -2229,7 +2229,7 @@ int a; // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l43_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -2237,7 +2237,7 @@ int a; // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l43_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -2398,7 +2398,7 @@ int a; // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l46_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -2406,7 +2406,7 @@ int a; // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l46_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -2574,7 +2574,7 @@ int a; // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l49_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -2582,7 +2582,7 @@ int a; // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l49_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -2750,7 +2750,7 @@ int a; // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l52_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -2758,7 +2758,7 @@ int a; // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l52_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -2926,7 +2926,7 @@ int a; // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l55_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -2934,7 +2934,7 @@ int a; // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l55_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -3102,7 +3102,7 @@ int a; // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l58_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -3110,7 +3110,7 @@ int a; // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l58_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -3286,7 +3286,7 @@ int a; // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -3294,7 +3294,7 @@ int a; // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -3481,7 +3481,7 @@ int a; // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -3489,7 +3489,7 @@ int a; // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -3672,7 +3672,7 @@ int a; // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l81_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -3680,7 +3680,7 @@ int a; // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l81_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -3862,7 +3862,7 @@ int a; // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l85_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -3870,7 +3870,7 @@ int a; // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l85_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -4052,7 +4052,7 @@ int a; // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l89_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -4060,7 +4060,7 @@ int a; // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l89_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -4242,7 +4242,7 @@ int a; // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l93_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -4250,7 +4250,7 @@ int a; // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l93_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -4432,7 +4432,7 @@ int a; // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l97_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -4440,7 +4440,7 @@ int a; // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l97_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -4601,7 +4601,7 @@ int a; // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l101_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -4609,7 +4609,7 @@ int a; // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l101_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -4779,7 +4779,7 @@ int a; // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l105_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -4787,7 +4787,7 @@ int a; // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l105_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -4948,7 +4948,7 @@ int a; // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l109_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -4956,7 +4956,7 @@ int a; // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l109_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -5124,7 +5124,7 @@ int a; // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l113_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -5132,7 +5132,7 @@ int a; // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l113_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -5300,7 +5300,7 @@ int a; // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l117_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -5308,7 +5308,7 @@ int a; // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l117_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -5476,7 +5476,7 @@ int a; // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l121_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -5484,7 +5484,7 @@ int a; // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l121_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -5652,7 +5652,7 @@ int a; // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l125_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -5660,7 +5660,7 @@ int a; // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l125_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -5821,7 +5821,7 @@ int a; // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l130_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -5829,7 +5829,7 @@ int a; // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l130_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -5999,7 +5999,7 @@ int a; // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l135_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -6007,7 +6007,7 @@ int a; // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l135_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -6168,7 +6168,7 @@ int a; // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l140_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -6176,7 +6176,7 @@ int a; // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l140_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -6344,7 +6344,7 @@ int a; // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l145_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -6352,7 +6352,7 @@ int a; // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l145_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -6520,7 +6520,7 @@ int a; // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l150_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -6528,7 +6528,7 @@ int a; // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l150_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -6696,7 +6696,7 @@ int a; // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l155_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -6704,7 +6704,7 @@ int a; // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l155_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -6874,7 +6874,7 @@ int a; // CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 // CHECK-64-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l160_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -6884,7 +6884,7 @@ int a; // CHECK-64-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i32 // CHECK-64-NEXT: [[TMP4:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 [[TMP3]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l160_omp_outlined to i8*), i8* null, i8** [[TMP4]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -6971,14 +6971,14 @@ int a; // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l163_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l163_omp_outlined to i8*), i8* null, i8** [[TMP2]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -7048,14 +7048,14 @@ int a; // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l166_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l166_omp_outlined to i8*), i8* null, i8** [[TMP2]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -7142,14 +7142,14 @@ int a; // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l169_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l169_omp_outlined to i8*), i8* null, i8** [[TMP2]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -7215,14 +7215,14 @@ int a; // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l172_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l172_omp_outlined to i8*), i8* null, i8** [[TMP2]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -7288,14 +7288,14 @@ int a; // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l175_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l175_omp_outlined to i8*), i8* null, i8** [[TMP2]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -7361,14 +7361,14 @@ int a; // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l178_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l178_omp_outlined to i8*), i8* null, i8** [[TMP2]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -7437,7 +7437,7 @@ int a; // CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 // CHECK-64-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l181_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -7447,7 +7447,7 @@ int a; // CHECK-64-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i32 // CHECK-64-NEXT: [[TMP4:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 [[TMP3]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l181_omp_outlined to i8*), i8* null, i8** [[TMP4]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -7542,14 +7542,14 @@ int a; // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l185_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l185_omp_outlined to i8*), i8* null, i8** [[TMP2]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -7627,14 +7627,14 @@ int a; // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l189_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l189_omp_outlined to i8*), i8* null, i8** [[TMP2]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -7729,14 +7729,14 @@ int a; // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l193_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l193_omp_outlined to i8*), i8* null, i8** [[TMP2]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -7810,14 +7810,14 @@ int a; // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l197_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l197_omp_outlined to i8*), i8* null, i8** [[TMP2]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -7891,14 +7891,14 @@ int a; // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l201_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l201_omp_outlined to i8*), i8* null, i8** [[TMP2]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -7972,14 +7972,14 @@ int a; // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l205_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l205_omp_outlined to i8*), i8* null, i8** [[TMP2]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -8053,14 +8053,14 @@ int a; // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l209_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l209_omp_outlined to i8*), i8* null, i8** [[TMP2]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -8135,14 +8135,14 @@ int a; // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l214_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l214_omp_outlined to i8*), i8* null, i8** [[TMP2]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -8220,14 +8220,14 @@ int a; // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l219_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l219_omp_outlined to i8*), i8* null, i8** [[TMP2]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -8322,14 +8322,14 @@ int a; // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l224_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l224_omp_outlined to i8*), i8* null, i8** [[TMP2]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -8403,14 +8403,14 @@ int a; // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l229_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l229_omp_outlined to i8*), i8* null, i8** [[TMP2]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -8484,14 +8484,14 @@ int a; // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l234_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l234_omp_outlined to i8*), i8* null, i8** [[TMP2]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -8565,14 +8565,14 @@ int a; // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l239_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l239_omp_outlined to i8*), i8* null, i8** [[TMP2]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -8646,14 +8646,14 @@ int a; // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l244_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l244_omp_outlined to i8*), i8* null, i8** [[TMP2]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -8740,14 +8740,14 @@ int a; // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l248_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l248_omp_outlined to i8*), i8* null, i8** [[TMP2]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -8817,14 +8817,14 @@ int a; // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l252_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l252_omp_outlined to i8*), i8* null, i8** [[TMP2]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -8911,14 +8911,14 @@ int a; // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l256_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l256_omp_outlined to i8*), i8* null, i8** [[TMP2]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -8984,14 +8984,14 @@ int a; // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l260_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l260_omp_outlined to i8*), i8* null, i8** [[TMP2]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -9057,14 +9057,14 @@ int a; // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l264_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l264_omp_outlined to i8*), i8* null, i8** [[TMP2]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -9130,14 +9130,14 @@ int a; // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l268_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l268_omp_outlined to i8*), i8* null, i8** [[TMP2]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -9208,11 +9208,11 @@ int a; // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK-32-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK-32-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK-32-NEXT: [[CONV1:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__CASTED]] to i8* @@ -9222,7 +9222,7 @@ int a; // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2:[0-9]+]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -9618,7 +9618,7 @@ int a; // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l18_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -9626,7 +9626,7 @@ int a; // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l18_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -9806,7 +9806,7 @@ int a; // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l21_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -9814,7 +9814,7 @@ int a; // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l21_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -9984,7 +9984,7 @@ int a; // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l24_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -9992,7 +9992,7 @@ int a; // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l24_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -10170,7 +10170,7 @@ int a; // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l27_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -10178,7 +10178,7 @@ int a; // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l27_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -10356,7 +10356,7 @@ int a; // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l30_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -10364,7 +10364,7 @@ int a; // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l30_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -10542,7 +10542,7 @@ int a; // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l33_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -10550,7 +10550,7 @@ int a; // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l33_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -10731,7 +10731,7 @@ int a; // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l37_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -10742,7 +10742,7 @@ int a; // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l37_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -10931,7 +10931,7 @@ int a; // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l40_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -10939,7 +10939,7 @@ int a; // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l40_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -11105,7 +11105,7 @@ int a; // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l43_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -11113,7 +11113,7 @@ int a; // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l43_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -11269,7 +11269,7 @@ int a; // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l46_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -11277,7 +11277,7 @@ int a; // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l46_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -11441,7 +11441,7 @@ int a; // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l49_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -11449,7 +11449,7 @@ int a; // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l49_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -11613,7 +11613,7 @@ int a; // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l52_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -11621,7 +11621,7 @@ int a; // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l52_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -11785,7 +11785,7 @@ int a; // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l55_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -11793,7 +11793,7 @@ int a; // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l55_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -11957,7 +11957,7 @@ int a; // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l58_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -11965,7 +11965,7 @@ int a; // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l58_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -12136,7 +12136,7 @@ int a; // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -12144,7 +12144,7 @@ int a; // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -12327,7 +12327,7 @@ int a; // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -12335,7 +12335,7 @@ int a; // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -12513,7 +12513,7 @@ int a; // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l81_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -12521,7 +12521,7 @@ int a; // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l81_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -12699,7 +12699,7 @@ int a; // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l85_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -12707,7 +12707,7 @@ int a; // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l85_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -12885,7 +12885,7 @@ int a; // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l89_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -12893,7 +12893,7 @@ int a; // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l89_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -13071,7 +13071,7 @@ int a; // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l93_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -13079,7 +13079,7 @@ int a; // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l93_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -13257,7 +13257,7 @@ int a; // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l97_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -13265,7 +13265,7 @@ int a; // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l97_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -13421,7 +13421,7 @@ int a; // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l101_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -13429,7 +13429,7 @@ int a; // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l101_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -13595,7 +13595,7 @@ int a; // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l105_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -13603,7 +13603,7 @@ int a; // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l105_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -13759,7 +13759,7 @@ int a; // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l109_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -13767,7 +13767,7 @@ int a; // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l109_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -13931,7 +13931,7 @@ int a; // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l113_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -13939,7 +13939,7 @@ int a; // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l113_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -14103,7 +14103,7 @@ int a; // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l117_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -14111,7 +14111,7 @@ int a; // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l117_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -14275,7 +14275,7 @@ int a; // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l121_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -14283,7 +14283,7 @@ int a; // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l121_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -14447,7 +14447,7 @@ int a; // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l125_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -14455,7 +14455,7 @@ int a; // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l125_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -14611,7 +14611,7 @@ int a; // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l130_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -14619,7 +14619,7 @@ int a; // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l130_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -14785,7 +14785,7 @@ int a; // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l135_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -14793,7 +14793,7 @@ int a; // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l135_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -14949,7 +14949,7 @@ int a; // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l140_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -14957,7 +14957,7 @@ int a; // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l140_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -15121,7 +15121,7 @@ int a; // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l145_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -15129,7 +15129,7 @@ int a; // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l145_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -15293,7 +15293,7 @@ int a; // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l150_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -15301,7 +15301,7 @@ int a; // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l150_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -15465,7 +15465,7 @@ int a; // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l155_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -15473,7 +15473,7 @@ int a; // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l155_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -15639,7 +15639,7 @@ int a; // CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 // CHECK-32-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK-32-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l160_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -15649,7 +15649,7 @@ int a; // CHECK-32-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i32 // CHECK-32-NEXT: [[TMP4:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 [[TMP3]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l160_omp_outlined to i8*), i8* null, i8** [[TMP4]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -15736,14 +15736,14 @@ int a; // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l163_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l163_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -15813,14 +15813,14 @@ int a; // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l166_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l166_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -15907,14 +15907,14 @@ int a; // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l169_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l169_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -15980,14 +15980,14 @@ int a; // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l172_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l172_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -16053,14 +16053,14 @@ int a; // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l175_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l175_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -16126,14 +16126,14 @@ int a; // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l178_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l178_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -16202,7 +16202,7 @@ int a; // CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 // CHECK-32-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK-32-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l181_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -16212,7 +16212,7 @@ int a; // CHECK-32-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i32 // CHECK-32-NEXT: [[TMP4:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 [[TMP3]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l181_omp_outlined to i8*), i8* null, i8** [[TMP4]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -16307,14 +16307,14 @@ int a; // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l185_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l185_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -16392,14 +16392,14 @@ int a; // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l189_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l189_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -16494,14 +16494,14 @@ int a; // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l193_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l193_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -16575,14 +16575,14 @@ int a; // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l197_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l197_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -16656,14 +16656,14 @@ int a; // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l201_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l201_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -16737,14 +16737,14 @@ int a; // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l205_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l205_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -16818,14 +16818,14 @@ int a; // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l209_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l209_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -16900,14 +16900,14 @@ int a; // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l214_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l214_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -16985,14 +16985,14 @@ int a; // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l219_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l219_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -17087,14 +17087,14 @@ int a; // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l224_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l224_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -17168,14 +17168,14 @@ int a; // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l229_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l229_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -17249,14 +17249,14 @@ int a; // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l234_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l234_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -17330,14 +17330,14 @@ int a; // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l239_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l239_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -17411,14 +17411,14 @@ int a; // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l244_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l244_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -17505,14 +17505,14 @@ int a; // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l248_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l248_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -17582,14 +17582,14 @@ int a; // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l252_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l252_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -17676,14 +17676,14 @@ int a; // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l256_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l256_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -17749,14 +17749,14 @@ int a; // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l260_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l260_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -17822,14 +17822,14 @@ int a; // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l264_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l264_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -17895,14 +17895,14 @@ int a; // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l268_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l268_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -17973,11 +17973,11 @@ int a; // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK-32-EX-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK-32-EX-NEXT: [[CONV1:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__CASTED]] to i8* @@ -17987,7 +17987,7 @@ int a; // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2:[0-9]+]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -18383,7 +18383,7 @@ int a; // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l18_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -18391,7 +18391,7 @@ int a; // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l18_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -18571,7 +18571,7 @@ int a; // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l21_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -18579,7 +18579,7 @@ int a; // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l21_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -18749,7 +18749,7 @@ int a; // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l24_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -18757,7 +18757,7 @@ int a; // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l24_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -18935,7 +18935,7 @@ int a; // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l27_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -18943,7 +18943,7 @@ int a; // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l27_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -19121,7 +19121,7 @@ int a; // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l30_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -19129,7 +19129,7 @@ int a; // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l30_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -19307,7 +19307,7 @@ int a; // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l33_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -19315,7 +19315,7 @@ int a; // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l33_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -19496,7 +19496,7 @@ int a; // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l37_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -19507,7 +19507,7 @@ int a; // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l37_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -19696,7 +19696,7 @@ int a; // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l40_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -19704,7 +19704,7 @@ int a; // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l40_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -19870,7 +19870,7 @@ int a; // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l43_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -19878,7 +19878,7 @@ int a; // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l43_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -20034,7 +20034,7 @@ int a; // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l46_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -20042,7 +20042,7 @@ int a; // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l46_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -20206,7 +20206,7 @@ int a; // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l49_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -20214,7 +20214,7 @@ int a; // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l49_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -20378,7 +20378,7 @@ int a; // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l52_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -20386,7 +20386,7 @@ int a; // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l52_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -20550,7 +20550,7 @@ int a; // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l55_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -20558,7 +20558,7 @@ int a; // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l55_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -20722,7 +20722,7 @@ int a; // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l58_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -20730,7 +20730,7 @@ int a; // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l58_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -20901,7 +20901,7 @@ int a; // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -20909,7 +20909,7 @@ int a; // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -21092,7 +21092,7 @@ int a; // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -21100,7 +21100,7 @@ int a; // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -21278,7 +21278,7 @@ int a; // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l81_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -21286,7 +21286,7 @@ int a; // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l81_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -21464,7 +21464,7 @@ int a; // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l85_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -21472,7 +21472,7 @@ int a; // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l85_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -21650,7 +21650,7 @@ int a; // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l89_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -21658,7 +21658,7 @@ int a; // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l89_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -21836,7 +21836,7 @@ int a; // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l93_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -21844,7 +21844,7 @@ int a; // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l93_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -22022,7 +22022,7 @@ int a; // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l97_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -22030,7 +22030,7 @@ int a; // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l97_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -22186,7 +22186,7 @@ int a; // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l101_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -22194,7 +22194,7 @@ int a; // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l101_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -22360,7 +22360,7 @@ int a; // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l105_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -22368,7 +22368,7 @@ int a; // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l105_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -22524,7 +22524,7 @@ int a; // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l109_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -22532,7 +22532,7 @@ int a; // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l109_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -22696,7 +22696,7 @@ int a; // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l113_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -22704,7 +22704,7 @@ int a; // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l113_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -22868,7 +22868,7 @@ int a; // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l117_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -22876,7 +22876,7 @@ int a; // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l117_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -23040,7 +23040,7 @@ int a; // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l121_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -23048,7 +23048,7 @@ int a; // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l121_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -23212,7 +23212,7 @@ int a; // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l125_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -23220,7 +23220,7 @@ int a; // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l125_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -23376,7 +23376,7 @@ int a; // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l130_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -23384,7 +23384,7 @@ int a; // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l130_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -23550,7 +23550,7 @@ int a; // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l135_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -23558,7 +23558,7 @@ int a; // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l135_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -23714,7 +23714,7 @@ int a; // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l140_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -23722,7 +23722,7 @@ int a; // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l140_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -23886,7 +23886,7 @@ int a; // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l145_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -23894,7 +23894,7 @@ int a; // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l145_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -24058,7 +24058,7 @@ int a; // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l150_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -24066,7 +24066,7 @@ int a; // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l150_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -24230,7 +24230,7 @@ int a; // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l155_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -24238,7 +24238,7 @@ int a; // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l155_omp_outlined(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -24404,7 +24404,7 @@ int a; // CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 // CHECK-32-EX-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK-32-EX-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l160_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -24414,7 +24414,7 @@ int a; // CHECK-32-EX-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i32 // CHECK-32-EX-NEXT: [[TMP4:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 [[TMP3]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l160_omp_outlined to i8*), i8* null, i8** [[TMP4]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -24501,14 +24501,14 @@ int a; // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l163_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l163_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -24578,14 +24578,14 @@ int a; // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l166_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l166_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -24672,14 +24672,14 @@ int a; // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l169_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l169_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -24745,14 +24745,14 @@ int a; // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l172_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l172_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -24818,14 +24818,14 @@ int a; // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l175_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l175_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -24891,14 +24891,14 @@ int a; // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l178_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l178_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -24967,7 +24967,7 @@ int a; // CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 // CHECK-32-EX-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK-32-EX-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l181_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -24977,7 +24977,7 @@ int a; // CHECK-32-EX-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i32 // CHECK-32-EX-NEXT: [[TMP4:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 [[TMP3]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l181_omp_outlined to i8*), i8* null, i8** [[TMP4]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -25072,14 +25072,14 @@ int a; // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l185_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l185_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -25157,14 +25157,14 @@ int a; // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l189_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l189_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -25259,14 +25259,14 @@ int a; // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l193_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l193_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -25340,14 +25340,14 @@ int a; // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l197_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l197_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -25421,14 +25421,14 @@ int a; // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l201_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l201_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -25502,14 +25502,14 @@ int a; // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l205_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l205_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -25583,14 +25583,14 @@ int a; // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l209_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l209_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -25665,14 +25665,14 @@ int a; // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l214_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l214_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -25750,14 +25750,14 @@ int a; // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l219_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l219_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -25852,14 +25852,14 @@ int a; // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l224_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l224_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -25933,14 +25933,14 @@ int a; // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l229_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l229_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -26014,14 +26014,14 @@ int a; // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l234_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l234_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -26095,14 +26095,14 @@ int a; // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l239_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l239_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -26176,14 +26176,14 @@ int a; // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l244_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l244_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -26270,14 +26270,14 @@ int a; // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l248_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l248_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -26347,14 +26347,14 @@ int a; // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l252_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l252_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -26441,14 +26441,14 @@ int a; // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l256_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l256_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -26514,14 +26514,14 @@ int a; // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l260_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l260_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -26587,14 +26587,14 @@ int a; // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l264_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l264_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -26660,14 +26660,14 @@ int a; // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l268_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l268_omp_outlined to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_data_sharing.cpp b/clang/test/OpenMP/nvptx_data_sharing.cpp index efb14dec4c5c0..f81fe7ef0ec79 100644 --- a/clang/test/OpenMP/nvptx_data_sharing.cpp +++ b/clang/test/OpenMP/nvptx_data_sharing.cpp @@ -36,13 +36,13 @@ void test_ds(){ // CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 // CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [2 x ptr], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z7test_dsv_l14_kernel_environment) +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) // CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK: user_code.entry: // CHECK-NEXT: [[A:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4) // CHECK-NEXT: [[B:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4) -// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-NEXT: store i32 10, ptr [[A]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK-NEXT: store ptr [[A]], ptr [[TMP2]], align 8 @@ -56,7 +56,7 @@ void test_ds(){ // CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z7test_dsv_l14_omp_outlined1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z7test_dsv_l14_omp_outlined1_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 2) // CHECK-NEXT: call void @__kmpc_free_shared(ptr [[B]], i64 4) // CHECK-NEXT: call void @__kmpc_free_shared(ptr [[A]], i64 4) -// CHECK-NEXT: call void @__kmpc_target_deinit() +// CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK-NEXT: ret void // CHECK: worker.exit: // CHECK-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp b/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp index 2d25cf48b8962..dab378ac1a03e 100644 --- a/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp +++ b/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp @@ -44,18 +44,18 @@ int main(int argc, char **argv) { // CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C_ADDR]], align 8 // CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK4-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l19_kernel_environment) +// CHECK4-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK4-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1 // CHECK4-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK4: user_code.entry: -// CHECK4-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// CHECK4-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP6]], ptr [[ARGC_CASTED]], align 4 // CHECK4-NEXT: [[TMP7:%.*]] = load i64, ptr [[ARGC_CASTED]], align 8 // CHECK4-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP5]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK4-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l19_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], i64 [[TMP7]], ptr [[TMP3]]) #[[ATTR5:[0-9]+]] -// CHECK4-NEXT: call void @__kmpc_target_deinit() +// CHECK4-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK4-NEXT: ret void // CHECK4: worker.exit: // CHECK4-NEXT: ret void @@ -360,18 +360,18 @@ int main(int argc, char **argv) { // CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C_ADDR]], align 4 // CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l19_kernel_environment) +// CHECK5-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK5-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1 // CHECK5-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK5: user_code.entry: -// CHECK5-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// CHECK5-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 // CHECK5-NEXT: store i32 [[TMP6]], ptr [[ARGC_CASTED]], align 4 // CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARGC_CASTED]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK5-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l19_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], i32 [[TMP7]], ptr [[TMP3]]) #[[ATTR5:[0-9]+]] -// CHECK5-NEXT: call void @__kmpc_target_deinit() +// CHECK5-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK5-NEXT: ret void // CHECK5: worker.exit: // CHECK5-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_multi_target_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_multi_target_parallel_codegen.cpp index 33797166afd1d..ad1b349caf6c8 100644 --- a/clang/test/OpenMP/nvptx_multi_target_parallel_codegen.cpp +++ b/clang/test/OpenMP/nvptx_multi_target_parallel_codegen.cpp @@ -29,13 +29,13 @@ int main() { // CHECK1-SAME: () #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l21_kernel_environment) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l21_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -// CHECK1-NEXT: call void @__kmpc_target_deinit() +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -64,12 +64,12 @@ int main() { // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23 // CHECK1-SAME: () #[[ATTR5:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_kernel_environment) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: call void @_Z3usev() #[[ATTR8]] -// CHECK1-NEXT: call void @__kmpc_target_deinit() +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -105,13 +105,13 @@ int main() { // CHECK2-SAME: () #[[ATTR0:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l21_kernel_environment) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l21_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK2-NEXT: call void @__kmpc_target_deinit() +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -140,12 +140,12 @@ int main() { // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23 // CHECK2-SAME: () #[[ATTR5:[0-9]+]] { // CHECK2-NEXT: entry: -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_kernel_environment) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: call void @_Z3usev() #[[ATTR8]] -// CHECK2-NEXT: call void @__kmpc_target_deinit() +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_nested_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_nested_parallel_codegen.cpp index c2b64ec8b7f6e..7bc970171c1be 100644 --- a/clang/test/OpenMP/nvptx_nested_parallel_codegen.cpp +++ b/clang/test/OpenMP/nvptx_nested_parallel_codegen.cpp @@ -40,16 +40,16 @@ int main() { // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25_kernel_environment) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: call void @_Z3usePi(ptr noundef [[TMP0]]) #[[ATTR6:[0-9]+]] // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 // CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1) -// CHECK1-NEXT: call void @__kmpc_target_deinit() +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -150,16 +150,16 @@ int main() { // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4 // CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25_kernel_environment) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: -// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK2-NEXT: call void @_Z3usePi(ptr noundef [[TMP0]]) #[[ATTR6:[0-9]+]] // CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 // CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 1) -// CHECK2-NEXT: call void @__kmpc_target_deinit() +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_parallel_codegen.cpp index 6998c7229696d..5ce8a714710c5 100644 --- a/clang/test/OpenMP/nvptx_parallel_codegen.cpp +++ b/clang/test/OpenMP/nvptx_parallel_codegen.cpp @@ -81,18 +81,18 @@ int bar(int n){ // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS2:%.*]] = alloca [0 x ptr], align 8 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_kernel_environment) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) // CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 0, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined1_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) // CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined2_wrapper, ptr [[CAPTURED_VARS_ADDRS2]], i64 0) // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1 // CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: call void @__kmpc_target_deinit() +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -192,7 +192,7 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_kernel_environment) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -213,7 +213,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP7]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: call void @__kmpc_target_deinit() +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -255,7 +255,7 @@ int bar(int n){ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l55_kernel_environment) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -270,7 +270,7 @@ int bar(int n){ // CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 // CHECK1-NEXT: store i32 [[INC]], ptr [[A1]], align 4 // CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[A1]], i64 4) -// CHECK1-NEXT: call void @__kmpc_target_deinit() +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -344,18 +344,18 @@ int bar(int n){ // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 4 // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS2:%.*]] = alloca [0 x ptr], align 4 // CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_kernel_environment) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 0) // CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 0, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined1_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i32 0) // CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined2_wrapper, ptr [[CAPTURED_VARS_ADDRS2]], i32 0) // CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1 // CHECK2-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: call void @__kmpc_target_deinit() +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -455,7 +455,7 @@ int bar(int n){ // CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_kernel_environment) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -476,7 +476,7 @@ int bar(int n){ // CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP7]], 1 // CHECK2-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 -// CHECK2-NEXT: call void @__kmpc_target_deinit() +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -518,7 +518,7 @@ int bar(int n){ // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4 // CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l55_kernel_environment) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -533,7 +533,7 @@ int bar(int n){ // CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 // CHECK2-NEXT: store i32 [[INC]], ptr [[A1]], align 4 // CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[A1]], i32 4) -// CHECK2-NEXT: call void @__kmpc_target_deinit() +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_parallel_for_codegen.cpp b/clang/test/OpenMP/nvptx_parallel_for_codegen.cpp index c56bde29844e9..7d15711b04efc 100644 --- a/clang/test/OpenMP/nvptx_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/nvptx_parallel_for_codegen.cpp @@ -41,12 +41,12 @@ int bar(int n){ // CHECK-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l13_kernel_environment) +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) // CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK: user_code.entry: // CHECK-NEXT: [[D:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK-NEXT: store i32 [[TMP3]], ptr [[D]], align 4 // CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 @@ -59,7 +59,7 @@ int bar(int n){ // CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1 // CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX]], align 4 // CHECK-NEXT: call void @__kmpc_free_shared(ptr [[D]], i64 4) -// CHECK-NEXT: call void @__kmpc_target_deinit() +// CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK-NEXT: ret void // CHECK: worker.exit: // CHECK-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_target_codegen.cpp b/clang/test/OpenMP/nvptx_target_codegen.cpp index 66be88b46037f..00d19a6f2c52e 100644 --- a/clang/test/OpenMP/nvptx_target_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_codegen.cpp @@ -153,17 +153,17 @@ void unreachable_call() { // CHECK1-NEXT: store ptr [[PTR1]], ptr [[PTR1_ADDR]], align 8 // CHECK1-NEXT: store ptr [[PTR2]], ptr [[PTR2_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR2_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9targetBarPiS__l25_kernel_environment) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK1-NEXT: store ptr [[PTR1_ADDR]], ptr [[TMP3]], align 8 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 // CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9targetBarPiS__l25_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2) -// CHECK1-NEXT: call void @__kmpc_target_deinit() +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -192,11 +192,11 @@ void unreachable_call() { // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l39 // CHECK1-SAME: () #[[ATTR4:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l39_kernel_environment) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: call void @__kmpc_target_deinit() +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -207,7 +207,7 @@ void unreachable_call() { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l47_kernel_environment) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -221,7 +221,7 @@ void unreachable_call() { // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 2 // CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 // CHECK1-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: call void @__kmpc_target_deinit() +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -256,7 +256,7 @@ void unreachable_call() { // CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 // CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l53_kernel_environment) +// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP8]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -300,7 +300,7 @@ void unreachable_call() { // CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[CALL]], align 8 // CHECK1-NEXT: [[ADD21:%.*]] = add nsw i64 [[TMP17]], 1 // CHECK1-NEXT: store i64 [[ADD21]], ptr [[CALL]], align 8 -// CHECK1-NEXT: call void @__kmpc_target_deinit() +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -330,7 +330,7 @@ void unreachable_call() { // CHECK1-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l90_kernel_environment) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -351,7 +351,7 @@ void unreachable_call() { // CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP5]], 1 // CHECK1-NEXT: store i32 [[ADD6]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: call void @__kmpc_target_deinit() +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -374,7 +374,7 @@ void unreachable_call() { // CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l108_kernel_environment) +// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -397,7 +397,7 @@ void unreachable_call() { // CHECK1-NEXT: [[CONV7:%.*]] = fptosi double [[TMP8]] to i32 // CHECK1-NEXT: [[A8:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 // CHECK1-NEXT: [[CALL:%.*]] = call i32 @_Z3baziRd(i32 [[CONV7]], ptr nonnull align 8 dereferenceable(8) [[A8]]) #[[ATTR10]] -// CHECK1-NEXT: call void @__kmpc_target_deinit() +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -426,7 +426,7 @@ void unreachable_call() { // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16unreachable_callv_l142 // CHECK1-SAME: () #[[ATTR4]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16unreachable_callv_l142_kernel_environment) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -435,7 +435,7 @@ void unreachable_call() { // CHECK1: worker.exit: // CHECK1-NEXT: ret void // CHECK1: 1: -// CHECK1-NEXT: call void @__kmpc_target_deinit() +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK1-NEXT: ret void // // @@ -449,7 +449,7 @@ void unreachable_call() { // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l74_kernel_environment) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -465,7 +465,7 @@ void unreachable_call() { // CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP4]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: call void @__kmpc_target_deinit() +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -523,17 +523,17 @@ void unreachable_call() { // CHECK2-NEXT: store ptr [[PTR1]], ptr [[PTR1_ADDR]], align 4 // CHECK2-NEXT: store ptr [[PTR2]], ptr [[PTR2_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR2_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9targetBarPiS__l25_kernel_environment) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: -// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // CHECK2-NEXT: store ptr [[PTR1_ADDR]], ptr [[TMP3]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 // CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 // CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9targetBarPiS__l25_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2) -// CHECK2-NEXT: call void @__kmpc_target_deinit() +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -562,11 +562,11 @@ void unreachable_call() { // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l39 // CHECK2-SAME: () #[[ATTR4:[0-9]+]] { // CHECK2-NEXT: entry: -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l39_kernel_environment) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: -// CHECK2-NEXT: call void @__kmpc_target_deinit() +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -577,7 +577,7 @@ void unreachable_call() { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l47_kernel_environment) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -591,7 +591,7 @@ void unreachable_call() { // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 2 // CHECK2-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 // CHECK2-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2 -// CHECK2-NEXT: call void @__kmpc_target_deinit() +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -626,7 +626,7 @@ void unreachable_call() { // CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 // CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 // CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l53_kernel_environment) +// CHECK2-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP8]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -670,7 +670,7 @@ void unreachable_call() { // CHECK2-NEXT: [[TMP17:%.*]] = load i64, ptr [[CALL]], align 8 // CHECK2-NEXT: [[ADD21:%.*]] = add nsw i64 [[TMP17]], 1 // CHECK2-NEXT: store i64 [[ADD21]], ptr [[CALL]], align 8 -// CHECK2-NEXT: call void @__kmpc_target_deinit() +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -700,7 +700,7 @@ void unreachable_call() { // CHECK2-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 // CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l90_kernel_environment) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -721,7 +721,7 @@ void unreachable_call() { // CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 // CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP5]], 1 // CHECK2-NEXT: store i32 [[ADD6]], ptr [[ARRAYIDX]], align 4 -// CHECK2-NEXT: call void @__kmpc_target_deinit() +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -744,7 +744,7 @@ void unreachable_call() { // CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l108_kernel_environment) +// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -767,7 +767,7 @@ void unreachable_call() { // CHECK2-NEXT: [[CONV7:%.*]] = fptosi double [[TMP8]] to i32 // CHECK2-NEXT: [[A8:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 // CHECK2-NEXT: [[CALL:%.*]] = call i32 @_Z3baziRd(i32 [[CONV7]], ptr nonnull align 8 dereferenceable(8) [[A8]]) #[[ATTR10]] -// CHECK2-NEXT: call void @__kmpc_target_deinit() +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -796,7 +796,7 @@ void unreachable_call() { // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16unreachable_callv_l142 // CHECK2-SAME: () #[[ATTR4]] { // CHECK2-NEXT: entry: -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16unreachable_callv_l142_kernel_environment) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -805,7 +805,7 @@ void unreachable_call() { // CHECK2: worker.exit: // CHECK2-NEXT: ret void // CHECK2: 1: -// CHECK2-NEXT: call void @__kmpc_target_deinit() +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK2-NEXT: ret void // // @@ -819,7 +819,7 @@ void unreachable_call() { // CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l74_kernel_environment) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -835,7 +835,7 @@ void unreachable_call() { // CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP4]], 1 // CHECK2-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 -// CHECK2-NEXT: call void @__kmpc_target_deinit() +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_target_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_codegen.cpp index 3ee0b70f56bc0..6d168f4674027 100644 --- a/clang/test/OpenMP/nvptx_target_parallel_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_codegen.cpp @@ -58,15 +58,15 @@ int bar(int n){ // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30_kernel_environment) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 // CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1) -// CHECK1-NEXT: call void @__kmpc_target_deinit() +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -103,7 +103,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l35_kernel_environment) +// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -115,7 +115,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 // CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 8 // CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l35_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3) -// CHECK1-NEXT: call void @__kmpc_target_deinit() +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -159,15 +159,15 @@ int bar(int n){ // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4 // CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30_kernel_environment) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: -// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 // CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1) -// CHECK2-NEXT: call void @__kmpc_target_deinit() +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -204,7 +204,7 @@ int bar(int n){ // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l35_kernel_environment) +// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -216,7 +216,7 @@ int bar(int n){ // CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 // CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 4 // CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l35_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3) -// CHECK2-NEXT: call void @__kmpc_target_deinit() +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp index d525d2c931ef0..847a276b5644f 100644 --- a/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp @@ -53,15 +53,15 @@ int bar(int n){ // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l25_kernel_environment) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 // CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l25_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1) -// CHECK1-NEXT: call void @__kmpc_target_deinit() +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -100,7 +100,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30_kernel_environment) +// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -113,7 +113,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 // CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 8 // CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3) -// CHECK1-NEXT: call void @__kmpc_target_deinit() +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -157,15 +157,15 @@ int bar(int n){ // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4 // CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l25_kernel_environment) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: -// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 // CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l25_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1) -// CHECK2-NEXT: call void @__kmpc_target_deinit() +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -204,7 +204,7 @@ int bar(int n){ // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30_kernel_environment) +// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -217,7 +217,7 @@ int bar(int n){ // CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 // CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 4 // CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3) -// CHECK2-NEXT: call void @__kmpc_target_deinit() +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp index 2a437e7b9af97..6d1635b9c4d3d 100644 --- a/clang/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp @@ -52,17 +52,141 @@ int bar(int n){ } #endif +// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27 +// CHECK-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK: user_code.entry: +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-NEXT: ret void +// CHECK: worker.exit: +// CHECK-NEXT: ret void +// CHECK-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-NEXT: ret void +// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31 +// CHECK-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4 +// CHECK-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK: user_code.entry: +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-NEXT: [[TMP5:%.*]] = inttoptr i32 [[TMP3]] to ptr +// CHECK-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 4 +// CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1) +// CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-NEXT: ret void +// CHECK: worker.exit: +// CHECK-NEXT: ret void +// CHECK-LABEL: define {{[^@]+}}@__omp_outlined__1 +// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 +// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 +// CHECK-NEXT: store i16 [[CONV1]], ptr [[AA_ADDR]], align 2 +// CHECK-NEXT: ret void +// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36 +// CHECK-SAME: (i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4 +// CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// CHECK-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK: user_code.entry: +// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED]], align 2 +// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[AA_CASTED]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-NEXT: [[TMP8:%.*]] = inttoptr i32 [[TMP4]] to ptr +// CHECK-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 4 +// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK-NEXT: store ptr [[TMP0]], ptr [[TMP11]], align 4 +// CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3) +// CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-NEXT: ret void +// CHECK: worker.exit: +// CHECK-NEXT: ret void +// CHECK-LABEL: define {{[^@]+}}@__omp_outlined__2 +// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// CHECK-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 +// CHECK-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32 +// CHECK-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 +// CHECK-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 +// CHECK-NEXT: ret void // CHECK45-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27 // CHECK45-64-SAME: () #[[ATTR0:[0-9]+]] { // CHECK45-64-NEXT: entry: // CHECK45-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -// CHECK45-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_kernel_environment) +// CHECK45-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-64: user_code.entry: -// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK45-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -// CHECK45-64-NEXT: call void @__kmpc_target_deinit() +// CHECK45-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK45-64-NEXT: ret void // CHECK45-64: worker.exit: // CHECK45-64-NEXT: ret void @@ -85,7 +209,7 @@ int bar(int n){ // CHECK45-64-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK45-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 // CHECK45-64-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK45-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment) +// CHECK45-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-64: user_code.entry: @@ -97,7 +221,7 @@ int bar(int n){ // CHECK45-64-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP3]] to ptr // CHECK45-64-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 8 // CHECK45-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1) -// CHECK45-64-NEXT: call void @__kmpc_target_deinit() +// CHECK45-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK45-64-NEXT: ret void // CHECK45-64: worker.exit: // CHECK45-64-NEXT: ret void @@ -133,7 +257,7 @@ int bar(int n){ // CHECK45-64-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK45-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_kernel_environment) +// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-64: user_code.entry: @@ -153,7 +277,7 @@ int bar(int n){ // CHECK45-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 // CHECK45-64-NEXT: store ptr [[TMP0]], ptr [[TMP11]], align 8 // CHECK45-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3) -// CHECK45-64-NEXT: call void @__kmpc_target_deinit() +// CHECK45-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK45-64-NEXT: ret void // CHECK45-64: worker.exit: // CHECK45-64-NEXT: ret void @@ -192,13 +316,13 @@ int bar(int n){ // CHECK45-32-SAME: () #[[ATTR0:[0-9]+]] { // CHECK45-32-NEXT: entry: // CHECK45-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK45-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_kernel_environment) +// CHECK45-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-32: user_code.entry: -// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK45-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK45-32-NEXT: call void @__kmpc_target_deinit() +// CHECK45-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK45-32-NEXT: ret void // CHECK45-32: worker.exit: // CHECK45-32-NEXT: ret void @@ -221,7 +345,7 @@ int bar(int n){ // CHECK45-32-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK45-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4 // CHECK45-32-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK45-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment) +// CHECK45-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-32: user_code.entry: @@ -233,7 +357,7 @@ int bar(int n){ // CHECK45-32-NEXT: [[TMP5:%.*]] = inttoptr i32 [[TMP3]] to ptr // CHECK45-32-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 4 // CHECK45-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1) -// CHECK45-32-NEXT: call void @__kmpc_target_deinit() +// CHECK45-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK45-32-NEXT: ret void // CHECK45-32: worker.exit: // CHECK45-32-NEXT: ret void @@ -269,7 +393,7 @@ int bar(int n){ // CHECK45-32-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK45-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_kernel_environment) +// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-32: user_code.entry: @@ -289,7 +413,7 @@ int bar(int n){ // CHECK45-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 // CHECK45-32-NEXT: store ptr [[TMP0]], ptr [[TMP11]], align 4 // CHECK45-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3) -// CHECK45-32-NEXT: call void @__kmpc_target_deinit() +// CHECK45-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK45-32-NEXT: ret void // CHECK45-32: worker.exit: // CHECK45-32-NEXT: ret void @@ -328,13 +452,13 @@ int bar(int n){ // CHECK45-32-EX-SAME: () #[[ATTR0:[0-9]+]] { // CHECK45-32-EX-NEXT: entry: // CHECK45-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_kernel_environment) +// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-32-EX: user_code.entry: -// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK45-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK45-32-EX-NEXT: ret void // CHECK45-32-EX: worker.exit: // CHECK45-32-EX-NEXT: ret void @@ -357,7 +481,7 @@ int bar(int n){ // CHECK45-32-EX-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK45-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4 // CHECK45-32-EX-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment) +// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-32-EX: user_code.entry: @@ -369,7 +493,7 @@ int bar(int n){ // CHECK45-32-EX-NEXT: [[TMP5:%.*]] = inttoptr i32 [[TMP3]] to ptr // CHECK45-32-EX-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 4 // CHECK45-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1) -// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK45-32-EX-NEXT: ret void // CHECK45-32-EX: worker.exit: // CHECK45-32-EX-NEXT: ret void @@ -405,7 +529,7 @@ int bar(int n){ // CHECK45-32-EX-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK45-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_kernel_environment) +// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-32-EX: user_code.entry: @@ -425,7 +549,7 @@ int bar(int n){ // CHECK45-32-EX-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 // CHECK45-32-EX-NEXT: store ptr [[TMP0]], ptr [[TMP11]], align 4 // CHECK45-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3) -// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK45-32-EX-NEXT: ret void // CHECK45-32-EX: worker.exit: // CHECK45-32-EX-NEXT: ret void @@ -464,13 +588,13 @@ int bar(int n){ // CHECK-64-SAME: () #[[ATTR0:[0-9]+]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -493,7 +617,7 @@ int bar(int n){ // CHECK-64-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 // CHECK-64-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -505,7 +629,7 @@ int bar(int n){ // CHECK-64-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP3]] to ptr // CHECK-64-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 8 // CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1) -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -541,7 +665,7 @@ int bar(int n){ // CHECK-64-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_kernel_environment) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -561,7 +685,7 @@ int bar(int n){ // CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 // CHECK-64-NEXT: store ptr [[TMP0]], ptr [[TMP11]], align 8 // CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3) -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -600,13 +724,13 @@ int bar(int n){ // CHECK-32-SAME: () #[[ATTR0:[0-9]+]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -629,7 +753,7 @@ int bar(int n){ // CHECK-32-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4 // CHECK-32-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -641,7 +765,7 @@ int bar(int n){ // CHECK-32-NEXT: [[TMP5:%.*]] = inttoptr i32 [[TMP3]] to ptr // CHECK-32-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 4 // CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1) -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -677,7 +801,7 @@ int bar(int n){ // CHECK-32-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_kernel_environment) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -697,7 +821,7 @@ int bar(int n){ // CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 // CHECK-32-NEXT: store ptr [[TMP0]], ptr [[TMP11]], align 4 // CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3) -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -736,13 +860,13 @@ int bar(int n){ // CHECK-32-EX-SAME: () #[[ATTR0:[0-9]+]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -765,7 +889,7 @@ int bar(int n){ // CHECK-32-EX-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4 // CHECK-32-EX-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -777,7 +901,7 @@ int bar(int n){ // CHECK-32-EX-NEXT: [[TMP5:%.*]] = inttoptr i32 [[TMP3]] to ptr // CHECK-32-EX-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 4 // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -813,7 +937,7 @@ int bar(int n){ // CHECK-32-EX-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -833,7 +957,7 @@ int bar(int n){ // CHECK-32-EX-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 // CHECK-32-EX-NEXT: store ptr [[TMP0]], ptr [[TMP11]], align 4 // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp index 27fcf8cebae75..e94befaa0aa65 100644 --- a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp @@ -104,17 +104,17 @@ int bar(int n){ // CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 // CHECK-64-NEXT: store double* [[E]], double** [[E_ADDR]], align 8 // CHECK-64-NEXT: [[TMP0:%.*]] = load double*, double** [[E_ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l24_kernel_environment) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK-64-NEXT: [[TMP4:%.*]] = bitcast double* [[TMP0]] to i8* // CHECK-64-NEXT: store i8* [[TMP4]], i8** [[TMP3]], align 8 // CHECK-64-NEXT: [[TMP5:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, double*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l24_omp_outlined to i8*), i8* null, i8** [[TMP5]], i64 1) -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -303,7 +303,7 @@ int bar(int n){ // CHECK-64-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 // CHECK-64-NEXT: [[TMP0:%.*]] = load i8*, i8** [[C_ADDR]], align 8 // CHECK-64-NEXT: [[TMP1:%.*]] = load float*, float** [[D_ADDR]], align 8 -// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_kernel_environment) +// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -315,7 +315,7 @@ int bar(int n){ // CHECK-64-NEXT: store i8* [[TMP6]], i8** [[TMP5]], align 8 // CHECK-64-NEXT: [[TMP7:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i8*, float*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_omp_outlined to i8*), i8* null, i8** [[TMP7]], i64 2) -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -559,7 +559,7 @@ int bar(int n){ // CHECK-64-NEXT: store i16* [[B]], i16** [[B_ADDR]], align 8 // CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK-64-NEXT: [[TMP1:%.*]] = load i16*, i16** [[B_ADDR]], align 8 -// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l35_kernel_environment) +// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -572,7 +572,7 @@ int bar(int n){ // CHECK-64-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 8 // CHECK-64-NEXT: [[TMP8:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i16*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l35_omp_outlined to i8*), i8* null, i8** [[TMP8]], i64 2) -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -836,17 +836,17 @@ int bar(int n){ // CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4 // CHECK-32-NEXT: store double* [[E]], double** [[E_ADDR]], align 4 // CHECK-32-NEXT: [[TMP0:%.*]] = load double*, double** [[E_ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l24_kernel_environment) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // CHECK-32-NEXT: [[TMP4:%.*]] = bitcast double* [[TMP0]] to i8* // CHECK-32-NEXT: store i8* [[TMP4]], i8** [[TMP3]], align 4 // CHECK-32-NEXT: [[TMP5:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, double*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l24_omp_outlined to i8*), i8* null, i8** [[TMP5]], i32 1) -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -1035,7 +1035,7 @@ int bar(int n){ // CHECK-32-NEXT: store float* [[D]], float** [[D_ADDR]], align 4 // CHECK-32-NEXT: [[TMP0:%.*]] = load i8*, i8** [[C_ADDR]], align 4 // CHECK-32-NEXT: [[TMP1:%.*]] = load float*, float** [[D_ADDR]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_kernel_environment) +// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -1047,7 +1047,7 @@ int bar(int n){ // CHECK-32-NEXT: store i8* [[TMP6]], i8** [[TMP5]], align 4 // CHECK-32-NEXT: [[TMP7:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i8*, float*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_omp_outlined to i8*), i8* null, i8** [[TMP7]], i32 2) -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -1291,7 +1291,7 @@ int bar(int n){ // CHECK-32-NEXT: store i16* [[B]], i16** [[B_ADDR]], align 4 // CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 // CHECK-32-NEXT: [[TMP1:%.*]] = load i16*, i16** [[B_ADDR]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l35_kernel_environment) +// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -1304,7 +1304,7 @@ int bar(int n){ // CHECK-32-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 4 // CHECK-32-NEXT: [[TMP8:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i16*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l35_omp_outlined to i8*), i8* null, i8** [[TMP8]], i32 2) -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -1568,17 +1568,17 @@ int bar(int n){ // CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4 // CHECK-32-EX-NEXT: store double* [[E]], double** [[E_ADDR]], align 4 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = load double*, double** [[E_ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l24_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // CHECK-32-EX-NEXT: [[TMP4:%.*]] = bitcast double* [[TMP0]] to i8* // CHECK-32-EX-NEXT: store i8* [[TMP4]], i8** [[TMP3]], align 4 // CHECK-32-EX-NEXT: [[TMP5:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, double*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l24_omp_outlined to i8*), i8* null, i8** [[TMP5]], i32 1) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -1767,7 +1767,7 @@ int bar(int n){ // CHECK-32-EX-NEXT: store float* [[D]], float** [[D_ADDR]], align 4 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i8*, i8** [[C_ADDR]], align 4 // CHECK-32-EX-NEXT: [[TMP1:%.*]] = load float*, float** [[D_ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -1779,7 +1779,7 @@ int bar(int n){ // CHECK-32-EX-NEXT: store i8* [[TMP6]], i8** [[TMP5]], align 4 // CHECK-32-EX-NEXT: [[TMP7:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i8*, float*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_omp_outlined to i8*), i8* null, i8** [[TMP7]], i32 2) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -2023,7 +2023,7 @@ int bar(int n){ // CHECK-32-EX-NEXT: store i16* [[B]], i16** [[B_ADDR]], align 4 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 // CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i16*, i16** [[B_ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l35_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -2036,7 +2036,7 @@ int bar(int n){ // CHECK-32-EX-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 4 // CHECK-32-EX-NEXT: [[TMP8:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i16*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l35_omp_outlined to i8*), i8* null, i8** [[TMP8]], i32 2) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp index 1b93527b4f033..7846c3472de26 100644 --- a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp @@ -35,15 +35,15 @@ void test() { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l16_kernel_environment) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA8:![0-9]+]] // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l16_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR6:[0-9]+]] -// CHECK1-NEXT: call void @__kmpc_target_deinit() +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -527,7 +527,7 @@ void test() { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l16_kernel_environment) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -535,7 +535,7 @@ void test() { // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l16_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR6]] -// CHECK1-NEXT: call void @__kmpc_target_deinit() +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_target_printf_codegen.c b/clang/test/OpenMP/nvptx_target_printf_codegen.c index 38bd906a1be41..f4b6be40c73c3 100644 --- a/clang/test/OpenMP/nvptx_target_printf_codegen.c +++ b/clang/test/OpenMP/nvptx_target_printf_codegen.c @@ -45,7 +45,7 @@ void CheckAllocaIsInEntryBlock(void) { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[FMT:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[TMP:%.*]] = alloca [[PRINTF_ARGS:%.*]], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_CheckSimple_l13_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -58,7 +58,7 @@ void CheckAllocaIsInEntryBlock(void) { // CHECK-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PRINTF_ARGS]], ptr [[TMP]], i32 0, i32 2 // CHECK-64-NEXT: store double 3.000000e+00, ptr [[TMP4]], align 8 // CHECK-64-NEXT: [[TMP5:%.*]] = call i32 @__llvm_omp_vprintf(ptr [[TMP1]], ptr [[TMP]], i32 24) -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -67,12 +67,12 @@ void CheckAllocaIsInEntryBlock(void) { // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_CheckNoArgs_l25 // CHECK-64-SAME: () #[[ATTR0]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_CheckNoArgs_l25_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__llvm_omp_vprintf(ptr @.str1, ptr null, i32 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -84,7 +84,7 @@ void CheckAllocaIsInEntryBlock(void) { // CHECK-64-NEXT: [[FOO_ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[TMP:%.*]] = alloca [[PRINTF_ARGS_0:%.*]], align 8 // CHECK-64-NEXT: store i64 [[FOO]], ptr [[FOO_ADDR]], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_CheckAllocaIsInEntryBlock_l36_kernel_environment) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -99,7 +99,7 @@ void CheckAllocaIsInEntryBlock(void) { // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // CHECK-64: if.end: -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK-64-NEXT: ret void // // @@ -108,7 +108,7 @@ void CheckAllocaIsInEntryBlock(void) { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[FMT:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca [[PRINTF_ARGS:%.*]], align 8 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_CheckSimple_l13_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -121,7 +121,7 @@ void CheckAllocaIsInEntryBlock(void) { // CHECK-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PRINTF_ARGS]], ptr [[TMP]], i32 0, i32 2 // CHECK-32-NEXT: store double 3.000000e+00, ptr [[TMP4]], align 8 // CHECK-32-NEXT: [[TMP5:%.*]] = call i32 @__llvm_omp_vprintf(ptr [[TMP1]], ptr [[TMP]], i32 24) -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -130,12 +130,12 @@ void CheckAllocaIsInEntryBlock(void) { // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_CheckNoArgs_l25 // CHECK-32-SAME: () #[[ATTR0]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_CheckNoArgs_l25_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__llvm_omp_vprintf(ptr @.str1, ptr null, i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -147,7 +147,7 @@ void CheckAllocaIsInEntryBlock(void) { // CHECK-32-NEXT: [[FOO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca [[PRINTF_ARGS_0:%.*]], align 8 // CHECK-32-NEXT: store i32 [[FOO]], ptr [[FOO_ADDR]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_CheckAllocaIsInEntryBlock_l36_kernel_environment) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -162,6 +162,6 @@ void CheckAllocaIsInEntryBlock(void) { // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // CHECK-32: if.end: -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK-32-NEXT: ret void // diff --git a/clang/test/OpenMP/nvptx_target_simd_codegen.cpp b/clang/test/OpenMP/nvptx_target_simd_codegen.cpp index 2d3dadbdabf5a..4dacc010267ef 100644 --- a/clang/test/OpenMP/nvptx_target_simd_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_simd_codegen.cpp @@ -72,7 +72,7 @@ int bar(int n){ // CHECK45-64-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK45-64-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l29_kernel_environment) +// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-64: user_code.entry: @@ -124,7 +124,7 @@ int bar(int n){ // CHECK45-64-NEXT: store i32 [[ADD10]], ptr [[I3]], align 4 // CHECK45-64-NEXT: br label [[SIMD_IF_END]] // CHECK45-64: simd.if.end: -// CHECK45-64-NEXT: call void @__kmpc_target_deinit() +// CHECK45-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK45-64-NEXT: ret void // // @@ -142,7 +142,7 @@ int bar(int n){ // CHECK45-64-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK45-64-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 // CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 -// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment) +// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-64: user_code.entry: @@ -198,7 +198,7 @@ int bar(int n){ // CHECK45-64-NEXT: store i32 [[ADD12]], ptr [[I3]], align 4 // CHECK45-64-NEXT: br label [[SIMD_IF_END]] // CHECK45-64: simd.if.end: -// CHECK45-64-NEXT: call void @__kmpc_target_deinit() +// CHECK45-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK45-64-NEXT: ret void // // @@ -211,7 +211,7 @@ int bar(int n){ // CHECK45-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK45-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_kernel_environment) +// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-64: user_code.entry: @@ -244,7 +244,7 @@ int bar(int n){ // CHECK45-64-NEXT: ret void // CHECK45-64: omp.inner.for.end: // CHECK45-64-NEXT: store i32 10, ptr [[I]], align 4 -// CHECK45-64-NEXT: call void @__kmpc_target_deinit() +// CHECK45-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK45-64-NEXT: ret void // // @@ -261,7 +261,7 @@ int bar(int n){ // CHECK45-64-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 // CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 // CHECK45-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK45-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_kernel_environment) +// CHECK45-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1 // CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-64: user_code.entry: @@ -299,7 +299,7 @@ int bar(int n){ // CHECK45-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[N1]], align 4 // CHECK45-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] // CHECK45-64-NEXT: store i32 [[ADD4]], ptr [[TMP1]], align 4 -// CHECK45-64-NEXT: call void @__kmpc_target_deinit() +// CHECK45-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK45-64-NEXT: ret void // // @@ -317,7 +317,7 @@ int bar(int n){ // CHECK45-32-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK45-32-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l29_kernel_environment) +// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-32: user_code.entry: @@ -368,7 +368,7 @@ int bar(int n){ // CHECK45-32-NEXT: store i32 [[ADD10]], ptr [[I3]], align 4 // CHECK45-32-NEXT: br label [[SIMD_IF_END]] // CHECK45-32: simd.if.end: -// CHECK45-32-NEXT: call void @__kmpc_target_deinit() +// CHECK45-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK45-32-NEXT: ret void // // @@ -386,7 +386,7 @@ int bar(int n){ // CHECK45-32-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK45-32-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 // CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 -// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment) +// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-32: user_code.entry: @@ -441,7 +441,7 @@ int bar(int n){ // CHECK45-32-NEXT: store i32 [[ADD12]], ptr [[I3]], align 4 // CHECK45-32-NEXT: br label [[SIMD_IF_END]] // CHECK45-32: simd.if.end: -// CHECK45-32-NEXT: call void @__kmpc_target_deinit() +// CHECK45-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK45-32-NEXT: ret void // // @@ -454,7 +454,7 @@ int bar(int n){ // CHECK45-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK45-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_kernel_environment) +// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-32: user_code.entry: @@ -486,7 +486,7 @@ int bar(int n){ // CHECK45-32-NEXT: ret void // CHECK45-32: omp.inner.for.end: // CHECK45-32-NEXT: store i32 10, ptr [[I]], align 4 -// CHECK45-32-NEXT: call void @__kmpc_target_deinit() +// CHECK45-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK45-32-NEXT: ret void // // @@ -503,7 +503,7 @@ int bar(int n){ // CHECK45-32-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 // CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 // CHECK45-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK45-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_kernel_environment) +// CHECK45-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1 // CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-32: user_code.entry: @@ -540,7 +540,7 @@ int bar(int n){ // CHECK45-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[N1]], align 4 // CHECK45-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] // CHECK45-32-NEXT: store i32 [[ADD4]], ptr [[TMP1]], align 4 -// CHECK45-32-NEXT: call void @__kmpc_target_deinit() +// CHECK45-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK45-32-NEXT: ret void // // @@ -558,7 +558,7 @@ int bar(int n){ // CHECK45-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK45-32-EX-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l29_kernel_environment) +// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-32-EX: user_code.entry: @@ -609,7 +609,7 @@ int bar(int n){ // CHECK45-32-EX-NEXT: store i32 [[ADD10]], ptr [[I3]], align 4 // CHECK45-32-EX-NEXT: br label [[SIMD_IF_END]] // CHECK45-32-EX: simd.if.end: -// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK45-32-EX-NEXT: ret void // // @@ -627,7 +627,7 @@ int bar(int n){ // CHECK45-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK45-32-EX-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 // CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 -// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment) +// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-32-EX: user_code.entry: @@ -682,7 +682,7 @@ int bar(int n){ // CHECK45-32-EX-NEXT: store i32 [[ADD12]], ptr [[I3]], align 4 // CHECK45-32-EX-NEXT: br label [[SIMD_IF_END]] // CHECK45-32-EX: simd.if.end: -// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK45-32-EX-NEXT: ret void // // @@ -695,7 +695,7 @@ int bar(int n){ // CHECK45-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK45-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_kernel_environment) +// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-32-EX: user_code.entry: @@ -727,7 +727,7 @@ int bar(int n){ // CHECK45-32-EX-NEXT: ret void // CHECK45-32-EX: omp.inner.for.end: // CHECK45-32-EX-NEXT: store i32 10, ptr [[I]], align 4 -// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK45-32-EX-NEXT: ret void // // @@ -744,7 +744,7 @@ int bar(int n){ // CHECK45-32-EX-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 // CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 // CHECK45-32-EX-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_kernel_environment) +// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1 // CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-32-EX: user_code.entry: @@ -781,7 +781,7 @@ int bar(int n){ // CHECK45-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[N1]], align 4 // CHECK45-32-EX-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] // CHECK45-32-EX-NEXT: store i32 [[ADD4]], ptr [[TMP1]], align 4 -// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK45-32-EX-NEXT: ret void // // @@ -799,7 +799,7 @@ int bar(int n){ // CHECK-64-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK-64-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l29_kernel_environment) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -851,7 +851,7 @@ int bar(int n){ // CHECK-64-NEXT: store i32 [[ADD10]], ptr [[I3]], align 4 // CHECK-64-NEXT: br label [[SIMD_IF_END]] // CHECK-64: simd.if.end: -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // // @@ -869,7 +869,7 @@ int bar(int n){ // CHECK-64-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK-64-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 // CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -925,7 +925,7 @@ int bar(int n){ // CHECK-64-NEXT: store i32 [[ADD12]], ptr [[I3]], align 4 // CHECK-64-NEXT: br label [[SIMD_IF_END]] // CHECK-64: simd.if.end: -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // // @@ -938,7 +938,7 @@ int bar(int n){ // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_kernel_environment) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -971,7 +971,7 @@ int bar(int n){ // CHECK-64-NEXT: ret void // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: store i32 10, ptr [[I]], align 4 -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // // @@ -988,7 +988,7 @@ int bar(int n){ // CHECK-64-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 // CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 // CHECK-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_kernel_environment) +// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -1026,7 +1026,7 @@ int bar(int n){ // CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[N1]], align 4 // CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] // CHECK-64-NEXT: store i32 [[ADD4]], ptr [[TMP1]], align 4 -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // // @@ -1044,7 +1044,7 @@ int bar(int n){ // CHECK-32-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK-32-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l29_kernel_environment) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -1095,7 +1095,7 @@ int bar(int n){ // CHECK-32-NEXT: store i32 [[ADD10]], ptr [[I3]], align 4 // CHECK-32-NEXT: br label [[SIMD_IF_END]] // CHECK-32: simd.if.end: -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // // @@ -1113,7 +1113,7 @@ int bar(int n){ // CHECK-32-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK-32-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 // CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -1168,7 +1168,7 @@ int bar(int n){ // CHECK-32-NEXT: store i32 [[ADD12]], ptr [[I3]], align 4 // CHECK-32-NEXT: br label [[SIMD_IF_END]] // CHECK-32: simd.if.end: -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // // @@ -1181,7 +1181,7 @@ int bar(int n){ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_kernel_environment) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -1213,7 +1213,7 @@ int bar(int n){ // CHECK-32-NEXT: ret void // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: store i32 10, ptr [[I]], align 4 -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // // @@ -1230,7 +1230,7 @@ int bar(int n){ // CHECK-32-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 // CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 // CHECK-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_kernel_environment) +// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -1267,7 +1267,7 @@ int bar(int n){ // CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[N1]], align 4 // CHECK-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] // CHECK-32-NEXT: store i32 [[ADD4]], ptr [[TMP1]], align 4 -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // // @@ -1285,7 +1285,7 @@ int bar(int n){ // CHECK-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK-32-EX-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l29_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -1336,7 +1336,7 @@ int bar(int n){ // CHECK-32-EX-NEXT: store i32 [[ADD10]], ptr [[I3]], align 4 // CHECK-32-EX-NEXT: br label [[SIMD_IF_END]] // CHECK-32-EX: simd.if.end: -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // // @@ -1354,7 +1354,7 @@ int bar(int n){ // CHECK-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK-32-EX-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -1409,7 +1409,7 @@ int bar(int n){ // CHECK-32-EX-NEXT: store i32 [[ADD12]], ptr [[I3]], align 4 // CHECK-32-EX-NEXT: br label [[SIMD_IF_END]] // CHECK-32-EX: simd.if.end: -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // // @@ -1422,7 +1422,7 @@ int bar(int n){ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -1454,7 +1454,7 @@ int bar(int n){ // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // // @@ -1471,7 +1471,7 @@ int bar(int n){ // CHECK-32-EX-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 // CHECK-32-EX-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -1508,6 +1508,6 @@ int bar(int n){ // CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[N1]], align 4 // CHECK-32-EX-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] // CHECK-32-EX-NEXT: store i32 [[ADD4]], ptr [[TMP1]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // diff --git a/clang/test/OpenMP/nvptx_target_teams_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_codegen.cpp index ada0b280714e5..2e147ce35a07a 100644 --- a/clang/test/OpenMP/nvptx_target_teams_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_codegen.cpp @@ -57,18 +57,18 @@ int bar(int n){ // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l23_kernel_environment) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: [[TMP2:%.*]] = load i8, ptr [[A_ADDR]], align 1 // CHECK1-NEXT: store i8 [[TMP2]], ptr [[A_CASTED]], align 1 // CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[A_CASTED]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l23_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP3]]) #[[ATTR2:[0-9]+]] -// CHECK1-NEXT: call void @__kmpc_target_deinit() +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -95,7 +95,7 @@ int bar(int n){ // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l28_kernel_environment) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -106,7 +106,7 @@ int bar(int n){ // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l28_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP3]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_target_deinit() +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -133,7 +133,7 @@ int bar(int n){ // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_kernel_environment) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -144,7 +144,7 @@ int bar(int n){ // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP3]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_target_deinit() +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -209,18 +209,18 @@ int bar(int n){ // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l23_kernel_environment) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK2-NEXT: [[TMP2:%.*]] = load i8, ptr [[A_ADDR]], align 1 // CHECK2-NEXT: store i8 [[TMP2]], ptr [[A_CASTED]], align 1 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_CASTED]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l23_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2:[0-9]+]] -// CHECK2-NEXT: call void @__kmpc_target_deinit() +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -247,7 +247,7 @@ int bar(int n){ // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l28_kernel_environment) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -258,7 +258,7 @@ int bar(int n){ // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l28_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2]] -// CHECK2-NEXT: call void @__kmpc_target_deinit() +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -285,7 +285,7 @@ int bar(int n){ // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_kernel_environment) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -296,7 +296,7 @@ int bar(int n){ // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2]] -// CHECK2-NEXT: call void @__kmpc_target_deinit() +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp index 8e0f369ba86d0..c9434b5df2e57 100644 --- a/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp @@ -37,15 +37,15 @@ int bar(int n){ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l16_kernel_environment) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l16_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5:[0-9]+]] -// CHECK1-NEXT: call void @__kmpc_target_deinit() +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -155,15 +155,15 @@ int bar(int n){ // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l16_kernel_environment) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l16_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5:[0-9]+]] -// CHECK2-NEXT: call void @__kmpc_target_deinit() +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp index 7f2b6374faf52..ae7af8ef4ad1a 100644 --- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp @@ -85,11 +85,11 @@ int bar(int n){ // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_kernel_environment) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 @@ -99,7 +99,7 @@ int bar(int n){ // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR3:[0-9]+]] -// CHECK1-NEXT: call void @__kmpc_target_deinit() +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -396,7 +396,7 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -407,7 +407,7 @@ int bar(int n){ // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]] -// CHECK1-NEXT: call void @__kmpc_target_deinit() +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -645,7 +645,7 @@ int bar(int n){ // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_kernel_environment) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -653,7 +653,7 @@ int bar(int n){ // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]] -// CHECK1-NEXT: call void @__kmpc_target_deinit() +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -833,7 +833,7 @@ int bar(int n){ // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK1-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_kernel_environment) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -844,7 +844,7 @@ int bar(int n){ // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR3]] -// CHECK1-NEXT: call void @__kmpc_target_deinit() +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -1059,7 +1059,7 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l52_kernel_environment) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -1070,7 +1070,7 @@ int bar(int n){ // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l52_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]] -// CHECK1-NEXT: call void @__kmpc_target_deinit() +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -1371,7 +1371,7 @@ int bar(int n){ // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[V]], ptr [[V_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59_kernel_environment) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -1383,7 +1383,7 @@ int bar(int n){ // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], ptr [[TMP5]]) #[[ATTR3]] -// CHECK1-NEXT: call void @__kmpc_target_deinit() +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -1635,11 +1635,11 @@ int bar(int n){ // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK2-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_kernel_environment) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: -// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 @@ -1649,7 +1649,7 @@ int bar(int n){ // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR3:[0-9]+]] -// CHECK2-NEXT: call void @__kmpc_target_deinit() +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -1946,7 +1946,7 @@ int bar(int n){ // CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -1957,7 +1957,7 @@ int bar(int n){ // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]] -// CHECK2-NEXT: call void @__kmpc_target_deinit() +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -2195,7 +2195,7 @@ int bar(int n){ // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_kernel_environment) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -2203,7 +2203,7 @@ int bar(int n){ // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]] -// CHECK2-NEXT: call void @__kmpc_target_deinit() +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -2383,7 +2383,7 @@ int bar(int n){ // CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK2-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_kernel_environment) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -2394,7 +2394,7 @@ int bar(int n){ // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR3]] -// CHECK2-NEXT: call void @__kmpc_target_deinit() +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -2609,7 +2609,7 @@ int bar(int n){ // CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l52_kernel_environment) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -2620,7 +2620,7 @@ int bar(int n){ // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l52_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]] -// CHECK2-NEXT: call void @__kmpc_target_deinit() +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -2916,7 +2916,7 @@ int bar(int n){ // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK2-NEXT: store ptr [[V]], ptr [[V_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59_kernel_environment) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -2928,7 +2928,7 @@ int bar(int n){ // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], ptr [[TMP5]]) #[[ATTR3]] -// CHECK2-NEXT: call void @__kmpc_target_deinit() +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -3180,11 +3180,11 @@ int bar(int n){ // CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_kernel_environment) +// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: -// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 @@ -3194,7 +3194,7 @@ int bar(int n){ // CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]] -// CHECK3-NEXT: call void @__kmpc_target_deinit() +// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -3484,7 +3484,7 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK3-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment) +// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: @@ -3495,7 +3495,7 @@ int bar(int n){ // CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]] -// CHECK3-NEXT: call void @__kmpc_target_deinit() +// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -3727,7 +3727,7 @@ int bar(int n){ // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_kernel_environment) +// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: @@ -3735,7 +3735,7 @@ int bar(int n){ // CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]] -// CHECK3-NEXT: call void @__kmpc_target_deinit() +// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -3909,7 +3909,7 @@ int bar(int n){ // CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK3-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_kernel_environment) +// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: @@ -3920,7 +3920,7 @@ int bar(int n){ // CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR3]] -// CHECK3-NEXT: call void @__kmpc_target_deinit() +// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -4128,7 +4128,7 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l52_kernel_environment) +// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: @@ -4139,7 +4139,7 @@ int bar(int n){ // CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l52_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]] -// CHECK3-NEXT: call void @__kmpc_target_deinit() +// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -4443,7 +4443,7 @@ int bar(int n){ // CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store ptr [[V]], ptr [[V_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59_kernel_environment) +// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: @@ -4455,7 +4455,7 @@ int bar(int n){ // CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], ptr [[TMP5]]) #[[ATTR3]] -// CHECK3-NEXT: call void @__kmpc_target_deinit() +// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp index 86c22666b5283..45b81cadffa34 100644 --- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp @@ -43,11 +43,11 @@ int main(int argc, char **argv) { // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_kernel_environment) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP3]], ptr [[ARGC_CASTED]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[ARGC_CASTED]], align 8 @@ -57,7 +57,7 @@ int main(int argc, char **argv) { // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR3:[0-9]+]] -// CHECK1-NEXT: call void @__kmpc_target_deinit() +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -341,11 +341,11 @@ int main(int argc, char **argv) { // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK2-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_kernel_environment) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: -// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP3]], ptr [[ARGC_CASTED]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARGC_CASTED]], align 4 @@ -355,7 +355,7 @@ int main(int argc, char **argv) { // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]] -// CHECK2-NEXT: call void @__kmpc_target_deinit() +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp index e17564eae02d8..adf215c5e9e13 100644 --- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp @@ -73,11 +73,11 @@ int bar(int n){ // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_kernel_environment) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 @@ -87,7 +87,7 @@ int bar(int n){ // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR3:[0-9]+]] -// CHECK1-NEXT: call void @__kmpc_target_deinit() +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -408,7 +408,7 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l32_kernel_environment) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -419,7 +419,7 @@ int bar(int n){ // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l32_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]] -// CHECK1-NEXT: call void @__kmpc_target_deinit() +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -681,7 +681,7 @@ int bar(int n){ // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37_kernel_environment) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -689,7 +689,7 @@ int bar(int n){ // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]] -// CHECK1-NEXT: call void @__kmpc_target_deinit() +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -883,7 +883,7 @@ int bar(int n){ // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK1-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_kernel_environment) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -894,7 +894,7 @@ int bar(int n){ // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR3]] -// CHECK1-NEXT: call void @__kmpc_target_deinit() +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -1128,11 +1128,11 @@ int bar(int n){ // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK2-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_kernel_environment) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: -// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 @@ -1142,7 +1142,7 @@ int bar(int n){ // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]] -// CHECK2-NEXT: call void @__kmpc_target_deinit() +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -1456,7 +1456,7 @@ int bar(int n){ // CHECK2-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l32_kernel_environment) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -1467,7 +1467,7 @@ int bar(int n){ // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l32_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]] -// CHECK2-NEXT: call void @__kmpc_target_deinit() +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -1723,7 +1723,7 @@ int bar(int n){ // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37_kernel_environment) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -1731,7 +1731,7 @@ int bar(int n){ // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]] -// CHECK2-NEXT: call void @__kmpc_target_deinit() +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -1919,7 +1919,7 @@ int bar(int n){ // CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK2-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_kernel_environment) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -1930,7 +1930,7 @@ int bar(int n){ // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR3]] -// CHECK2-NEXT: call void @__kmpc_target_deinit() +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp index 1075ede87bcb7..7d9e22dcacdfc 100644 --- a/clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp @@ -81,11 +81,11 @@ int bar(int n){ // CHECK45-64-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK45-64-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8 // CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment) +// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-64: user_code.entry: -// CHECK45-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// CHECK45-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK45-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK45-64-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 // CHECK45-64-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 @@ -95,7 +95,7 @@ int bar(int n){ // CHECK45-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK45-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK45-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR3:[0-9]+]] -// CHECK45-64-NEXT: call void @__kmpc_target_deinit() +// CHECK45-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK45-64-NEXT: ret void // CHECK45-64: worker.exit: // CHECK45-64-NEXT: ret void @@ -246,7 +246,7 @@ int bar(int n){ // CHECK45-64-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK45-64-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 // CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 -// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_kernel_environment) +// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-64: user_code.entry: @@ -257,7 +257,7 @@ int bar(int n){ // CHECK45-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK45-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK45-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]] -// CHECK45-64-NEXT: call void @__kmpc_target_deinit() +// CHECK45-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK45-64-NEXT: ret void // CHECK45-64: worker.exit: // CHECK45-64-NEXT: ret void @@ -396,7 +396,7 @@ int bar(int n){ // CHECK45-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK45-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_kernel_environment) +// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-64: user_code.entry: @@ -404,7 +404,7 @@ int bar(int n){ // CHECK45-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK45-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK45-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]] -// CHECK45-64-NEXT: call void @__kmpc_target_deinit() +// CHECK45-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK45-64-NEXT: ret void // CHECK45-64: worker.exit: // CHECK45-64-NEXT: ret void @@ -515,7 +515,7 @@ int bar(int n){ // CHECK45-64-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK45-64-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8 // CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_kernel_environment) +// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-64: user_code.entry: @@ -526,7 +526,7 @@ int bar(int n){ // CHECK45-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK45-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK45-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR3]] -// CHECK45-64-NEXT: call void @__kmpc_target_deinit() +// CHECK45-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK45-64-NEXT: ret void // CHECK45-64: worker.exit: // CHECK45-64-NEXT: ret void @@ -664,11 +664,11 @@ int bar(int n){ // CHECK45-32-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK45-32-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4 // CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment) +// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-32: user_code.entry: -// CHECK45-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// CHECK45-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK45-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK45-32-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 // CHECK45-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 @@ -678,7 +678,7 @@ int bar(int n){ // CHECK45-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK45-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK45-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]] -// CHECK45-32-NEXT: call void @__kmpc_target_deinit() +// CHECK45-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK45-32-NEXT: ret void // CHECK45-32: worker.exit: // CHECK45-32-NEXT: ret void @@ -828,7 +828,7 @@ int bar(int n){ // CHECK45-32-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK45-32-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 // CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 -// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_kernel_environment) +// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-32: user_code.entry: @@ -839,7 +839,7 @@ int bar(int n){ // CHECK45-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK45-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK45-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]] -// CHECK45-32-NEXT: call void @__kmpc_target_deinit() +// CHECK45-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK45-32-NEXT: ret void // CHECK45-32: worker.exit: // CHECK45-32-NEXT: ret void @@ -977,7 +977,7 @@ int bar(int n){ // CHECK45-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK45-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_kernel_environment) +// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-32: user_code.entry: @@ -985,7 +985,7 @@ int bar(int n){ // CHECK45-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK45-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK45-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]] -// CHECK45-32-NEXT: call void @__kmpc_target_deinit() +// CHECK45-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK45-32-NEXT: ret void // CHECK45-32: worker.exit: // CHECK45-32-NEXT: ret void @@ -1095,7 +1095,7 @@ int bar(int n){ // CHECK45-32-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK45-32-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4 // CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_kernel_environment) +// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-32: user_code.entry: @@ -1106,7 +1106,7 @@ int bar(int n){ // CHECK45-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK45-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK45-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR3]] -// CHECK45-32-NEXT: call void @__kmpc_target_deinit() +// CHECK45-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK45-32-NEXT: ret void // CHECK45-32: worker.exit: // CHECK45-32-NEXT: ret void @@ -1242,11 +1242,11 @@ int bar(int n){ // CHECK45-32-EX-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK45-32-EX-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4 // CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment) +// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-32-EX: user_code.entry: -// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK45-32-EX-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 // CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 @@ -1256,7 +1256,7 @@ int bar(int n){ // CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK45-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK45-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]] -// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK45-32-EX-NEXT: ret void // CHECK45-32-EX: worker.exit: // CHECK45-32-EX-NEXT: ret void @@ -1406,7 +1406,7 @@ int bar(int n){ // CHECK45-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK45-32-EX-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 // CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 -// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_kernel_environment) +// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-32-EX: user_code.entry: @@ -1417,7 +1417,7 @@ int bar(int n){ // CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK45-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK45-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]] -// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK45-32-EX-NEXT: ret void // CHECK45-32-EX: worker.exit: // CHECK45-32-EX-NEXT: ret void @@ -1555,7 +1555,7 @@ int bar(int n){ // CHECK45-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK45-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_kernel_environment) +// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-32-EX: user_code.entry: @@ -1563,7 +1563,7 @@ int bar(int n){ // CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK45-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK45-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]] -// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK45-32-EX-NEXT: ret void // CHECK45-32-EX: worker.exit: // CHECK45-32-EX-NEXT: ret void @@ -1673,7 +1673,7 @@ int bar(int n){ // CHECK45-32-EX-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK45-32-EX-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4 // CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_kernel_environment) +// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-32-EX: user_code.entry: @@ -1684,7 +1684,7 @@ int bar(int n){ // CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK45-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK45-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR3]] -// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK45-32-EX-NEXT: ret void // CHECK45-32-EX: worker.exit: // CHECK45-32-EX-NEXT: ret void @@ -1820,11 +1820,11 @@ int bar(int n){ // CHECK-64-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK-64-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8 // CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 // CHECK-64-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 @@ -1834,7 +1834,7 @@ int bar(int n){ // CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR3:[0-9]+]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -1985,7 +1985,7 @@ int bar(int n){ // CHECK-64-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK-64-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 // CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_kernel_environment) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -1996,7 +1996,7 @@ int bar(int n){ // CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -2135,7 +2135,7 @@ int bar(int n){ // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_kernel_environment) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -2143,7 +2143,7 @@ int bar(int n){ // CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -2254,7 +2254,7 @@ int bar(int n){ // CHECK-64-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK-64-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8 // CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_kernel_environment) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -2265,7 +2265,7 @@ int bar(int n){ // CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR3]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit() +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -2403,11 +2403,11 @@ int bar(int n){ // CHECK-32-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4 // CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 // CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 @@ -2417,7 +2417,7 @@ int bar(int n){ // CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -2567,7 +2567,7 @@ int bar(int n){ // CHECK-32-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK-32-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 // CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_kernel_environment) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -2578,7 +2578,7 @@ int bar(int n){ // CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -2716,7 +2716,7 @@ int bar(int n){ // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_kernel_environment) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -2724,7 +2724,7 @@ int bar(int n){ // CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -2834,7 +2834,7 @@ int bar(int n){ // CHECK-32-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4 // CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_kernel_environment) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -2845,7 +2845,7 @@ int bar(int n){ // CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR3]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -2981,11 +2981,11 @@ int bar(int n){ // CHECK-32-EX-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 // CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 @@ -2995,7 +2995,7 @@ int bar(int n){ // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -3145,7 +3145,7 @@ int bar(int n){ // CHECK-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK-32-EX-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -3156,7 +3156,7 @@ int bar(int n){ // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -3294,7 +3294,7 @@ int bar(int n){ // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -3302,7 +3302,7 @@ int bar(int n){ // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -3412,7 +3412,7 @@ int bar(int n){ // CHECK-32-EX-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_kernel_environment) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -3423,7 +3423,7 @@ int bar(int n){ // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR3]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_teams_codegen.cpp b/clang/test/OpenMP/nvptx_teams_codegen.cpp index 0f42623c013b5..933744643d9e8 100644 --- a/clang/test/OpenMP/nvptx_teams_codegen.cpp +++ b/clang/test/OpenMP/nvptx_teams_codegen.cpp @@ -83,19 +83,19 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_kernel_environment) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 // CHECK1-NEXT: [[ARGC1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4) // CHECK1-NEXT: store i32 [[TMP1]], ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i64 4) -// CHECK1-NEXT: call void @__kmpc_target_deinit() +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -122,7 +122,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l15_kernel_environment) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -134,7 +134,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l15_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3]] // CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i64 8) -// CHECK1-NEXT: call void @__kmpc_target_deinit() +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -161,19 +161,19 @@ int main (int argc, char **argv) { // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_kernel_environment) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 // CHECK2-NEXT: [[ARGC1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) // CHECK2-NEXT: store i32 [[TMP1]], ptr [[ARGC1]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3:[0-9]+]] // CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i32 4) -// CHECK2-NEXT: call void @__kmpc_target_deinit() +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -200,7 +200,7 @@ int main (int argc, char **argv) { // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l15_kernel_environment) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -212,7 +212,7 @@ int main (int argc, char **argv) { // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l15_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3]] // CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i32 4) -// CHECK2-NEXT: call void @__kmpc_target_deinit() +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -243,19 +243,19 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK3-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 // CHECK3-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64_kernel_environment) +// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 // CHECK3-NEXT: [[ARGC1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4) // CHECK3-NEXT: store i32 [[TMP1]], ptr [[ARGC1]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3:[0-9]+]] // CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i64 4) -// CHECK3-NEXT: call void @__kmpc_target_deinit() +// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -286,7 +286,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK3-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 // CHECK3-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l53_kernel_environment) +// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: @@ -298,7 +298,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l53_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3]] // CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i64 8) -// CHECK3-NEXT: call void @__kmpc_target_deinit() +// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -329,19 +329,19 @@ int main (int argc, char **argv) { // CHECK4-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK4-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 // CHECK4-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 -// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64_kernel_environment) +// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) // CHECK4-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK4-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK4: user_code.entry: // CHECK4-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 // CHECK4-NEXT: [[ARGC1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) // CHECK4-NEXT: store i32 [[TMP1]], ptr [[ARGC1]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// CHECK4-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK4-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK4-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3:[0-9]+]] // CHECK4-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i32 4) -// CHECK4-NEXT: call void @__kmpc_target_deinit() +// CHECK4-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK4-NEXT: ret void // CHECK4: worker.exit: // CHECK4-NEXT: ret void @@ -372,7 +372,7 @@ int main (int argc, char **argv) { // CHECK4-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK4-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 // CHECK4-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 4 -// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l53_kernel_environment) +// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK4-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK4-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK4: user_code.entry: @@ -384,7 +384,7 @@ int main (int argc, char **argv) { // CHECK4-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK4-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l53_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3]] // CHECK4-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i32 4) -// CHECK4-NEXT: call void @__kmpc_target_deinit() +// CHECK4-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK4-NEXT: ret void // CHECK4: worker.exit: // CHECK4-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp b/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp index 9cbe9b0fc2dc1..6b804a8ac1130 100644 --- a/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp +++ b/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp @@ -57,19 +57,19 @@ int bar(int n){ // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i64 [[E]], ptr [[E_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_kernel_environment) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP1:%.*]] = load double, ptr [[E_ADDR]], align 8 // CHECK1-NEXT: [[E1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8) // CHECK1-NEXT: store double [[TMP1]], ptr [[E1]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[E1]]) #[[ATTR4:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[E1]], i64 8) -// CHECK1-NEXT: call void @__kmpc_target_deinit() +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -328,7 +328,7 @@ int bar(int n){ // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i64 [[C]], ptr [[C_ADDR]], align 8 // CHECK1-NEXT: store i64 [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_kernel_environment) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -344,7 +344,7 @@ int bar(int n){ // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[C1]], ptr [[D2]]) #[[ATTR4]] // CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[D2]], i64 4) // CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[C1]], i64 1) -// CHECK1-NEXT: call void @__kmpc_target_deinit() +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -677,7 +677,7 @@ int bar(int n){ // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_kernel_environment) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -685,7 +685,7 @@ int bar(int n){ // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]]) #[[ATTR4]] -// CHECK1-NEXT: call void @__kmpc_target_deinit() +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -1250,17 +1250,17 @@ int bar(int n){ // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_kernel_environment) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: -// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK2-NEXT: [[TMP3:%.*]] = load double, ptr [[TMP0]], align 8 // CHECK2-NEXT: store double [[TMP3]], ptr [[E1]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[E1]]) #[[ATTR4:[0-9]+]] -// CHECK2-NEXT: call void @__kmpc_target_deinit() +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -1519,7 +1519,7 @@ int bar(int n){ // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32 [[C]], ptr [[C_ADDR]], align 4 // CHECK2-NEXT: store i32 [[D]], ptr [[D_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_kernel_environment) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -1535,7 +1535,7 @@ int bar(int n){ // CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[C1]], ptr [[D2]]) #[[ATTR4]] // CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[D2]], i32 4) // CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[C1]], i32 1) -// CHECK2-NEXT: call void @__kmpc_target_deinit() +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -1868,7 +1868,7 @@ int bar(int n){ // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK2-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_kernel_environment) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -1876,7 +1876,7 @@ int bar(int n){ // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]]) #[[ATTR4]] -// CHECK2-NEXT: call void @__kmpc_target_deinit() +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -2441,17 +2441,17 @@ int bar(int n){ // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_kernel_environment) +// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: -// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: [[TMP3:%.*]] = load double, ptr [[TMP0]], align 8 // CHECK3-NEXT: store double [[TMP3]], ptr [[E1]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[E1]]) #[[ATTR4:[0-9]+]] -// CHECK3-NEXT: call void @__kmpc_target_deinit() +// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -2710,7 +2710,7 @@ int bar(int n){ // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32 [[C]], ptr [[C_ADDR]], align 4 // CHECK3-NEXT: store i32 [[D]], ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_kernel_environment) +// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: @@ -2726,7 +2726,7 @@ int bar(int n){ // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[C1]], ptr [[D2]]) #[[ATTR4]] // CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[D2]], i32 4) // CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[C1]], i32 1) -// CHECK3-NEXT: call void @__kmpc_target_deinit() +// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -3059,7 +3059,7 @@ int bar(int n){ // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_kernel_environment) +// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: @@ -3067,7 +3067,7 @@ int bar(int n){ // CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]]) #[[ATTR4]] -// CHECK3-NEXT: call void @__kmpc_target_deinit() +// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void diff --git a/clang/test/OpenMP/reduction_implicit_map.cpp b/clang/test/OpenMP/reduction_implicit_map.cpp index 418c14a549b06..0fbb708f1bd42 100644 --- a/clang/test/OpenMP/reduction_implicit_map.cpp +++ b/clang/test/OpenMP/reduction_implicit_map.cpp @@ -103,16 +103,16 @@ int main() // CHECK-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 // CHECK-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l32_kernel_environment) +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK: user_code.entry: -// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[E_ADDR]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK-NEXT: store ptr [[TMP2]], ptr [[TMP3]], align 8 // CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l32_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1) -// CHECK-NEXT: call void @__kmpc_target_deinit() +// CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-NEXT: ret void // CHECK: worker.exit: // CHECK-NEXT: ret void diff --git a/clang/test/OpenMP/remarks_parallel_in_multiple_target_state_machines.c b/clang/test/OpenMP/remarks_parallel_in_multiple_target_state_machines.c index f95a7278de683..4e4a3c8f08835 100644 --- a/clang/test/OpenMP/remarks_parallel_in_multiple_target_state_machines.c +++ b/clang/test/OpenMP/remarks_parallel_in_multiple_target_state_machines.c @@ -84,9 +84,9 @@ void spmd(void) { } #pragma omp begin declare target device_type(nohost) -struct KernelEnvironmentTy; -__attribute__((weak)) -extern "C" int __kmpc_target_init(struct KernelEnvironmentTy *) { // all-remark {{Could not internalize function. Some optimizations may not be possible. [OMP140]}} +__attribute__((weak)) +extern "C" int __kmpc_target_init(void *Ident, char Mode, + bool UseGenericStateMachine) { // all-remark {{Could not internalize function. Some optimizations may not be possible. [OMP140]}} return 0; } #pragma omp end declare target diff --git a/clang/test/OpenMP/remarks_parallel_in_target_state_machine.c b/clang/test/OpenMP/remarks_parallel_in_target_state_machine.c index 1180ca8c2dcd0..7be98a26d9a18 100644 --- a/clang/test/OpenMP/remarks_parallel_in_target_state_machine.c +++ b/clang/test/OpenMP/remarks_parallel_in_target_state_machine.c @@ -40,9 +40,9 @@ void spmd(void) { } #pragma omp begin declare target device_type(nohost) -struct KernelEnvironmentTy; -__attribute__((weak)) -extern "C" int __kmpc_target_init(struct KernelEnvironmentTy *) { // expected-remark {{Could not internalize function. Some optimizations may not be possible. [OMP140]}} +__attribute__((weak)) +extern "C" int __kmpc_target_init(void *Ident, char Mode, + bool UseGenericStateMachine) { // expected-remark {{Could not internalize function. Some optimizations may not be possible. [OMP140]}} return 0; } #pragma omp end declare target diff --git a/clang/test/OpenMP/target_parallel_debug_codegen.cpp b/clang/test/OpenMP/target_parallel_debug_codegen.cpp index 6b50f99d9275c..1cbc447b902e9 100644 --- a/clang/test/OpenMP/target_parallel_debug_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_debug_codegen.cpp @@ -95,7 +95,7 @@ int main() { // CHECK1-NEXT: [[TMP6:%.*]] = addrspacecast ptr addrspace(1) [[TMP5]] to ptr, !dbg [[DBG47]] // CHECK1-NEXT: store ptr [[TMP6]], ptr [[_TMP2]], align 8, !dbg [[DBG47]] // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG47]] -// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_kernel_environment), !dbg [[DBG47]] +// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false), !dbg [[DBG47]] // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP8]], -1, !dbg [[DBG47]] // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG47]] // CHECK1: user_code.entry: @@ -113,7 +113,7 @@ int main() { // CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG48]] // CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP16]], align 8, !dbg [[DBG48]] // CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB3]], i32 [[TMP9]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG48]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG49:![0-9]+]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB5:[0-9]+]], i8 2), !dbg [[DBG49:![0-9]+]] // CHECK1-NEXT: ret void, !dbg [[DBG51:![0-9]+]] // CHECK1: worker.exit: // CHECK1-NEXT: ret void, !dbg [[DBG47]] @@ -308,11 +308,11 @@ int main() { // CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG137]] // CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG137]] // CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG137]] -// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37_kernel_environment), !dbg [[DBG137]] +// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB7:[0-9]+]], i8 2, i1 false), !dbg [[DBG137]] // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP9]], -1, !dbg [[DBG137]] // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG137]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB7:[0-9]+]]) +// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB9:[0-9]+]]) // CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG138:![0-9]+]] // CHECK1-NEXT: store i32 [[TMP11]], ptr [[A_CASTED]], align 4, !dbg [[DBG138]] // CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[A_CASTED]], align 8, !dbg [[DBG138]] @@ -325,8 +325,8 @@ int main() { // CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP16]], align 8, !dbg [[DBG138]] // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG138]] // CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP17]], align 8, !dbg [[DBG138]] -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB7]], i32 [[TMP10]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG138]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG139:![0-9]+]] +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB9]], i32 [[TMP10]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG138]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB11:[0-9]+]], i8 2), !dbg [[DBG139:![0-9]+]] // CHECK1-NEXT: ret void, !dbg [[DBG141:![0-9]+]] // CHECK1: worker.exit: // CHECK1-NEXT: ret void, !dbg [[DBG137]] @@ -517,11 +517,11 @@ int main() { // CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(1) [[TMP9]] to ptr, !dbg [[DBG212]] // CHECK1-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8, !dbg [[DBG212]] // CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP3]], align 8, !dbg [[DBG212]] -// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l51_kernel_environment), !dbg [[DBG212]] +// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB13:[0-9]+]], i8 2, i1 false), !dbg [[DBG212]] // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP12]], -1, !dbg [[DBG212]] // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG212]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB11:[0-9]+]]) +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB15:[0-9]+]]) // CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG213:![0-9]+]] // CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 8, !dbg [[DBG213]] // CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG213]] @@ -530,8 +530,8 @@ int main() { // CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP16]], align 8, !dbg [[DBG213]] // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG213]] // CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP17]], align 8, !dbg [[DBG213]] -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB11]], i32 [[TMP13]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l51_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG213]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG214:![0-9]+]] +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB15]], i32 [[TMP13]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l51_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG213]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB17:[0-9]+]], i8 2), !dbg [[DBG214:![0-9]+]] // CHECK1-NEXT: ret void, !dbg [[DBG216:![0-9]+]] // CHECK1: worker.exit: // CHECK1-NEXT: ret void, !dbg [[DBG212]] diff --git a/clang/test/OpenMP/target_parallel_for_debug_codegen.cpp b/clang/test/OpenMP/target_parallel_for_debug_codegen.cpp index 32d1a96d5e63a..32e8e590f91fd 100644 --- a/clang/test/OpenMP/target_parallel_for_debug_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_for_debug_codegen.cpp @@ -89,7 +89,7 @@ int main() { // CHECK1-NEXT: [[TMP6:%.*]] = addrspacecast ptr addrspace(1) [[TMP5]] to ptr, !dbg [[DBG41]] // CHECK1-NEXT: store ptr [[TMP6]], ptr [[_TMP2]], align 8, !dbg [[DBG41]] // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG41]] -// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_kernel_environment), !dbg [[DBG41]] +// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false), !dbg [[DBG41]] // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP8]], -1, !dbg [[DBG41]] // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG41]] // CHECK1: user_code.entry: @@ -110,7 +110,7 @@ int main() { // CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP17]] to i1, !dbg [[DBG43]] // CHECK1-NEXT: [[TMP18:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG42]] // CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB6]], i32 [[TMP9]], i32 [[TMP18]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG42]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG45:![0-9]+]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB8:[0-9]+]], i8 2), !dbg [[DBG45:![0-9]+]] // CHECK1-NEXT: ret void, !dbg [[DBG46:![0-9]+]] // CHECK1: worker.exit: // CHECK1-NEXT: ret void, !dbg [[DBG41]] @@ -383,11 +383,11 @@ int main() { // CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG146]] // CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG146]] // CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG146]] -// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_kernel_environment), !dbg [[DBG146]] +// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB10:[0-9]+]], i8 2, i1 false), !dbg [[DBG146]] // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP9]], -1, !dbg [[DBG146]] // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG146]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB13:[0-9]+]]) +// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB15:[0-9]+]]) // CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG147:![0-9]+]] // CHECK1-NEXT: store i32 [[TMP11]], ptr [[A_CASTED]], align 4, !dbg [[DBG147]] // CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[A_CASTED]], align 8, !dbg [[DBG147]] @@ -400,8 +400,8 @@ int main() { // CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP16]], align 8, !dbg [[DBG147]] // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG147]] // CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP17]], align 8, !dbg [[DBG147]] -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB13]], i32 [[TMP10]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG147]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG148:![0-9]+]] +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB15]], i32 [[TMP10]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG147]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB17:[0-9]+]], i8 2), !dbg [[DBG148:![0-9]+]] // CHECK1-NEXT: ret void, !dbg [[DBG150:![0-9]+]] // CHECK1: worker.exit: // CHECK1-NEXT: ret void, !dbg [[DBG146]] @@ -466,7 +466,7 @@ int main() { // CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META172:![0-9]+]], metadata !DIExpression()), !dbg [[DBG155]] // CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG165]] // CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !dbg [[DBG165]] -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB10:[0-9]+]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG173:![0-9]+]] +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB12:[0-9]+]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG173:![0-9]+]] // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG165]] // CHECK1: omp.dispatch.cond: // CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG168]] @@ -559,7 +559,7 @@ int main() { // CHECK1-NEXT: store i32 [[ADD25]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG165]] // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG173]], !llvm.loop [[LOOP203:![0-9]+]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB12:[0-9]+]], i32 [[TMP10]]), !dbg [[DBG202:![0-9]+]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB14:[0-9]+]], i32 [[TMP10]]), !dbg [[DBG202:![0-9]+]] // CHECK1-NEXT: ret void, !dbg [[DBG204:![0-9]+]] // // @@ -665,11 +665,11 @@ int main() { // CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(1) [[TMP9]] to ptr, !dbg [[DBG236]] // CHECK1-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8, !dbg [[DBG236]] // CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP3]], align 8, !dbg [[DBG236]] -// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_kernel_environment), !dbg [[DBG236]] +// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB19:[0-9]+]], i8 2, i1 false), !dbg [[DBG236]] // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP12]], -1, !dbg [[DBG236]] // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG236]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB20:[0-9]+]]) +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB24:[0-9]+]]) // CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG237:![0-9]+]] // CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 8, !dbg [[DBG237]] // CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG237]] @@ -678,8 +678,8 @@ int main() { // CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP16]], align 8, !dbg [[DBG237]] // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG237]] // CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP17]], align 8, !dbg [[DBG237]] -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB20]], i32 [[TMP13]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG237]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG238:![0-9]+]] +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB24]], i32 [[TMP13]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG237]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB26:[0-9]+]], i8 2), !dbg [[DBG238:![0-9]+]] // CHECK1-NEXT: ret void, !dbg [[DBG240:![0-9]+]] // CHECK1: worker.exit: // CHECK1-NEXT: ret void, !dbg [[DBG236]] @@ -749,7 +749,7 @@ int main() { // CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META262:![0-9]+]], metadata !DIExpression()), !dbg [[DBG245]] // CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG255]] // CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4, !dbg [[DBG255]] -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB17:[0-9]+]], i32 [[TMP13]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG263:![0-9]+]] +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB21:[0-9]+]], i32 [[TMP13]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG263:![0-9]+]] // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG255]] // CHECK1: omp.dispatch.cond: // CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG258]] @@ -846,7 +846,7 @@ int main() { // CHECK1-NEXT: store i32 [[ADD29]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG255]] // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG263]], !llvm.loop [[LOOP294:![0-9]+]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB19:[0-9]+]], i32 [[TMP13]]), !dbg [[DBG293:![0-9]+]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB23:[0-9]+]], i32 [[TMP13]]), !dbg [[DBG293:![0-9]+]] // CHECK1-NEXT: ret void, !dbg [[DBG295:![0-9]+]] // // diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 33ce25566c541..f9ccad0a1d18c 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -1721,7 +1721,8 @@ class OpenMPIRBuilder { /// Create a runtime call for kmpc_target_deinit /// /// \param Loc The insert and source location description. - void createTargetDeinit(const LocationDescription &Loc); + /// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not. + void createTargetDeinit(const LocationDescription &Loc, bool IsSPMD); ///} diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index 30beed816a175..8a09fb7cb7a6f 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -96,11 +96,6 @@ __OMP_STRUCT_TYPE(KernelArgs, __tgt_kernel_arguments, false, Int32, Int32, VoidP Int64, Int64, Int32Arr3Ty, Int32Arr3Ty, Int32) __OMP_STRUCT_TYPE(AsyncInfo, __tgt_async_info, false, Int8Ptr) __OMP_STRUCT_TYPE(DependInfo, kmp_dep_info, false, SizeTy, SizeTy, Int8) -__OMP_STRUCT_TYPE(ConfigurationEnvironment, ConfigurationEnvironmentTy, false, - Int8, Int8, Int8) -__OMP_STRUCT_TYPE(DynamicEnvironment, DynamicEnvironmentTy, false, Int16) -__OMP_STRUCT_TYPE(KernelEnvironment, KernelEnvironmentTy, false, - ConfigurationEnvironment, IdentPtr, DynamicEnvironmentPtr) #undef __OMP_STRUCT_TYPE #undef OMP_STRUCT_TYPE @@ -457,8 +452,8 @@ __OMP_RTL(__kmpc_task_allow_completion_event, false, VoidPtr, IdentPtr, /* Int */ Int32, /* kmp_task_t */ VoidPtr) /// OpenMP Device runtime functions -__OMP_RTL(__kmpc_target_init, false, Int32, KernelEnvironmentPtr) -__OMP_RTL(__kmpc_target_deinit, false, Void,) +__OMP_RTL(__kmpc_target_init, false, Int32, IdentPtr, Int8, Int1) +__OMP_RTL(__kmpc_target_deinit, false, Void, IdentPtr, Int8) __OMP_RTL(__kmpc_kernel_prepare_parallel, false, Void, VoidPtr) __OMP_RTL(__kmpc_parallel_51, false, Void, IdentPtr, Int32, Int32, Int32, Int32, VoidPtr, VoidPtr, VoidPtrPtr, SizeTy) @@ -1017,9 +1012,9 @@ __OMP_RTL_ATTRS(__kmpc_task_allow_completion_event, DefaultAttrs, ReturnPtrAttrs, ParamAttrs(ReadOnlyPtrAttrs, SExt)) __OMP_RTL_ATTRS(__kmpc_target_init, AttributeSet(), SExt, - ParamAttrs(AttributeSet())) + ParamAttrs(AttributeSet(), SExt, SExt)) __OMP_RTL_ATTRS(__kmpc_target_deinit, AttributeSet(), AttributeSet(), - ParamAttrs()) + ParamAttrs(AttributeSet(), SExt)) __OMP_RTL_ATTRS(__kmpc_parallel_51, AlwaysInlineAttrs, AttributeSet(), ParamAttrs(AttributeSet(), SExt, SExt, SExt, SExt, AttributeSet(), AttributeSet(), AttributeSet(), diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 65aa119b81b96..cf329e40f5a44 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -22,7 +22,6 @@ #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/CFG.h" -#include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" @@ -3899,60 +3898,14 @@ OpenMPIRBuilder::createTargetInit(const LocationDescription &Loc, bool IsSPMD) { ConstantInt *IsSPMDVal = ConstantInt::getSigned( IntegerType::getInt8Ty(Int8->getContext()), IsSPMD ? OMP_TGT_EXEC_MODE_SPMD : OMP_TGT_EXEC_MODE_GENERIC); - ConstantInt *UseGenericStateMachineVal = ConstantInt::getSigned( - IntegerType::getInt8Ty(Int8->getContext()), !IsSPMD); - ConstantInt *MayUseNestedParallelismVal = - ConstantInt::getSigned(IntegerType::getInt8Ty(Int8->getContext()), true); - ConstantInt *DebugIndentionLevelVal = - ConstantInt::getSigned(IntegerType::getInt16Ty(Int8->getContext()), 0); - - // We need to strip the debug prefix to get the correct kernel name. - Function *Kernel = Builder.GetInsertBlock()->getParent(); - StringRef KernelName = Kernel->getName(); - const std::string DebugPrefix = "_debug__"; - if (KernelName.ends_with(DebugPrefix)) - KernelName = KernelName.drop_back(DebugPrefix.length()); + ConstantInt *UseGenericStateMachine = + ConstantInt::getBool(Int32->getContext(), !IsSPMD); Function *Fn = getOrCreateRuntimeFunctionPtr( omp::RuntimeFunction::OMPRTL___kmpc_target_init); - const DataLayout &DL = Fn->getParent()->getDataLayout(); - - Twine DynamicEnvironmentName = KernelName + "_dynamic_environment"; - Constant *DynamicEnvironmentInitializer = - ConstantStruct::get(DynamicEnvironment, {DebugIndentionLevelVal}); - Constant *DynamicEnvironmentGV = new GlobalVariable( - M, DynamicEnvironment, /* IsConstant */ false, - GlobalValue::InternalLinkage, DynamicEnvironmentInitializer, - DynamicEnvironmentName, - /* InsertBefore */ nullptr, llvm::GlobalValue::NotThreadLocal, - DL.getDefaultGlobalsAddressSpace()); - if (DynamicEnvironmentGV->getType() != DynamicEnvironmentPtr) - DynamicEnvironmentGV = ConstantExpr::getAddrSpaceCast( - DynamicEnvironmentGV, DynamicEnvironmentPtr); - - Constant *ConfigurationEnvironmentInitializer = ConstantStruct::get( - ConfigurationEnvironment, { - UseGenericStateMachineVal, - MayUseNestedParallelismVal, - IsSPMDVal, - }); - Constant *KernelEnvironmentInitializer = ConstantStruct::get( - KernelEnvironment, { - ConfigurationEnvironmentInitializer, - Ident, - DynamicEnvironmentGV, - }); - Twine KernelEnvironmentName = KernelName + "_kernel_environment"; - Constant *KernelEnvironmentGV = new GlobalVariable( - M, KernelEnvironment, /* IsConstant */ true, GlobalValue::ExternalLinkage, - KernelEnvironmentInitializer, KernelEnvironmentName, - /* InsertBefore */ nullptr, llvm::GlobalValue::NotThreadLocal, - DL.getDefaultGlobalsAddressSpace()); - if (KernelEnvironmentGV->getType() != KernelEnvironmentPtr) - KernelEnvironmentGV = ConstantExpr::getAddrSpaceCast(KernelEnvironmentGV, - KernelEnvironmentPtr); - - CallInst *ThreadKind = Builder.CreateCall(Fn, {KernelEnvironmentGV}); + + CallInst *ThreadKind = Builder.CreateCall( + Fn, {Ident, IsSPMDVal, UseGenericStateMachine}); Value *ExecUserCode = Builder.CreateICmpEQ( ThreadKind, ConstantInt::get(ThreadKind->getType(), -1), @@ -3985,14 +3938,22 @@ OpenMPIRBuilder::createTargetInit(const LocationDescription &Loc, bool IsSPMD) { return InsertPointTy(UserCodeEntryBB, UserCodeEntryBB->getFirstInsertionPt()); } -void OpenMPIRBuilder::createTargetDeinit(const LocationDescription &Loc) { +void OpenMPIRBuilder::createTargetDeinit(const LocationDescription &Loc, + bool IsSPMD) { if (!updateToLocation(Loc)) return; + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); + ConstantInt *IsSPMDVal = ConstantInt::getSigned( + IntegerType::getInt8Ty(Int8->getContext()), + IsSPMD ? OMP_TGT_EXEC_MODE_SPMD : OMP_TGT_EXEC_MODE_GENERIC); + Function *Fn = getOrCreateRuntimeFunctionPtr( omp::RuntimeFunction::OMPRTL___kmpc_target_deinit); - Builder.CreateCall(Fn, {}); + Builder.CreateCall(Fn, {Ident, IsSPMDVal}); } void OpenMPIRBuilder::setOutlinedTargetRegionFunctionAttributes( diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index dd34e0ef07a77..68846c6a44431 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -31,7 +31,6 @@ #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" -#include "llvm/Frontend/OpenMP/OMPDeviceConstants.h" #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" #include "llvm/IR/Assumptions.h" #include "llvm/IR/BasicBlock.h" @@ -179,80 +178,6 @@ STATISTIC(NumBarriersEliminated, "Number of redundant barriers eliminated"); static constexpr auto TAG = "[" DEBUG_TYPE "]"; #endif -namespace KernelInfo { - -// struct ConfigurationEnvironmentTy { -// uint8_t UseGenericStateMachine; -// uint8_t MayUseNestedParallelism; -// llvm::omp::OMPTgtExecModeFlags ExecMode; -// }; - -// struct DynamicEnvironmentTy { -// uint16_t DebugIndentionLevel; -// }; - -// struct KernelEnvironmentTy { -// ConfigurationEnvironmentTy Configuration; -// IdentTy *Ident; -// DynamicEnvironmentTy *DynamicEnv; -// }; - -#define KERNEL_ENVIRONMENT_IDX(MEMBER, IDX) \ - constexpr const unsigned MEMBER##Idx = IDX; - -KERNEL_ENVIRONMENT_IDX(Configuration, 0) -KERNEL_ENVIRONMENT_IDX(Ident, 1) - -#undef KERNEL_ENVIRONMENT_IDX - -#define KERNEL_ENVIRONMENT_CONFIGURATION_IDX(MEMBER, IDX) \ - constexpr const unsigned MEMBER##Idx = IDX; - -KERNEL_ENVIRONMENT_CONFIGURATION_IDX(UseGenericStateMachine, 0) -KERNEL_ENVIRONMENT_CONFIGURATION_IDX(MayUseNestedParallelism, 1) -KERNEL_ENVIRONMENT_CONFIGURATION_IDX(ExecMode, 2) - -#undef KERNEL_ENVIRONMENT_CONFIGURATION_IDX - -#define KERNEL_ENVIRONMENT_GETTER(MEMBER, RETURNTYPE) \ - RETURNTYPE *get##MEMBER##FromKernelEnvironment(ConstantStruct *KernelEnvC) { \ - return cast(KernelEnvC->getAggregateElement(MEMBER##Idx)); \ - } - -KERNEL_ENVIRONMENT_GETTER(Ident, Constant) -KERNEL_ENVIRONMENT_GETTER(Configuration, ConstantStruct) - -#undef KERNEL_ENVIRONMENT_GETTER - -#define KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(MEMBER) \ - ConstantInt *get##MEMBER##FromKernelEnvironment( \ - ConstantStruct *KernelEnvC) { \ - ConstantStruct *ConfigC = \ - getConfigurationFromKernelEnvironment(KernelEnvC); \ - return dyn_cast(ConfigC->getAggregateElement(MEMBER##Idx)); \ - } - -KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(UseGenericStateMachine) -KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(MayUseNestedParallelism) -KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(ExecMode) - -#undef KERNEL_ENVIRONMENT_CONFIGURATION_GETTER - -GlobalVariable * -getKernelEnvironementGVFromKernelInitCB(CallBase *KernelInitCB) { - constexpr const int InitKernelEnvironmentArgNo = 0; - return cast( - KernelInitCB->getArgOperand(InitKernelEnvironmentArgNo) - ->stripPointerCasts()); -} - -ConstantStruct *getKernelEnvironementFromKernelInitCB(CallBase *KernelInitCB) { - GlobalVariable *KernelEnvGV = - getKernelEnvironementGVFromKernelInitCB(KernelInitCB); - return cast(KernelEnvGV->getInitializer()); -} -} // namespace KernelInfo - namespace { struct AAHeapToShared; @@ -685,10 +610,6 @@ struct KernelInfoState : AbstractState { /// one we abort as the kernel is malformed. CallBase *KernelInitCB = nullptr; - /// The constant kernel environement as taken from and passed to - /// __kmpc_target_init. - ConstantStruct *KernelEnvC = nullptr; - /// The __kmpc_target_deinit call in this kernel, if any. If we find more than /// one we abort as the kernel is malformed. CallBase *KernelDeinitCB = nullptr; @@ -793,12 +714,6 @@ struct KernelInfoState : AbstractState { "assumptions."); KernelDeinitCB = KIS.KernelDeinitCB; } - if (KIS.KernelEnvC) { - if (KernelEnvC && KernelEnvC != KIS.KernelEnvC) - llvm_unreachable("Kernel that calls another kernel violates OpenMP-Opt " - "assumptions."); - KernelEnvC = KIS.KernelEnvC; - } SPMDCompatibilityTracker ^= KIS.SPMDCompatibilityTracker; ReachedKnownParallelRegions ^= KIS.ReachedKnownParallelRegions; ReachedUnknownParallelRegions ^= KIS.ReachedUnknownParallelRegions; @@ -2865,11 +2780,9 @@ struct AAExecutionDomainFunction : public AAExecutionDomain { CB = CB ? OpenMPOpt::getCallIfRegularCall(*CB, &RFI) : nullptr; if (!CB) return false; - ConstantStruct *KernelEnvC = - KernelInfo::getKernelEnvironementFromKernelInitCB(CB); - ConstantInt *ExecModeC = - KernelInfo::getExecModeFromKernelEnvironment(KernelEnvC); - return ExecModeC->getSExtValue() & OMP_TGT_EXEC_MODE_GENERIC; + const int InitModeArgNo = 1; + auto *ModeCI = dyn_cast(CB->getOperand(InitModeArgNo)); + return ModeCI && (ModeCI->getSExtValue() & OMP_TGT_EXEC_MODE_GENERIC); } if (C->isZero()) { @@ -3556,29 +3469,6 @@ struct AAKernelInfoFunction : AAKernelInfo { return GuardedInstructions; } - void setConfigurationOfKernelEnvironment(ConstantStruct *ConfigC) { - Constant *NewKernelEnvC = ConstantFoldInsertValueInstruction( - KernelEnvC, ConfigC, {KernelInfo::ConfigurationIdx}); - assert(NewKernelEnvC && "Failed to create new kernel environment"); - KernelEnvC = cast(NewKernelEnvC); - } - -#define KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(MEMBER) \ - void set##MEMBER##OfKernelEnvironment(ConstantInt *NewVal) { \ - ConstantStruct *ConfigC = \ - KernelInfo::getConfigurationFromKernelEnvironment(KernelEnvC); \ - Constant *NewConfigC = ConstantFoldInsertValueInstruction( \ - ConfigC, NewVal, {KernelInfo::MEMBER##Idx}); \ - assert(NewConfigC && "Failed to create new configuration environment"); \ - setConfigurationOfKernelEnvironment(cast(NewConfigC)); \ - } - - KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(UseGenericStateMachine) - KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(MayUseNestedParallelism) - KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(ExecMode) - -#undef KERNEL_ENVIRONMENT_CONFIGURATION_SETTER - /// See AbstractAttribute::initialize(...). void initialize(Attributor &A) override { // This is a high-level transform that might change the constant arguments @@ -3627,52 +3517,61 @@ struct AAKernelInfoFunction : AAKernelInfo { ReachingKernelEntries.insert(Fn); IsKernelEntry = true; - KernelEnvC = - KernelInfo::getKernelEnvironementFromKernelInitCB(KernelInitCB); - GlobalVariable *KernelEnvGV = - KernelInfo::getKernelEnvironementGVFromKernelInitCB(KernelInitCB); + // For kernels we might need to initialize/finalize the IsSPMD state and + // we need to register a simplification callback so that the Attributor + // knows the constant arguments to __kmpc_target_init and + // __kmpc_target_deinit might actually change. - Attributor::GlobalVariableSimplifictionCallbackTy - KernelConfigurationSimplifyCB = - [&](const GlobalVariable &GV, const AbstractAttribute *AA, - bool &UsedAssumedInformation) -> std::optional { - return KernelEnvC; + Attributor::SimplifictionCallbackTy StateMachineSimplifyCB = + [&](const IRPosition &IRP, const AbstractAttribute *AA, + bool &UsedAssumedInformation) -> std::optional { + return nullptr; + }; + + Attributor::SimplifictionCallbackTy ModeSimplifyCB = + [&](const IRPosition &IRP, const AbstractAttribute *AA, + bool &UsedAssumedInformation) -> std::optional { + // IRP represents the "SPMDCompatibilityTracker" argument of an + // __kmpc_target_init or + // __kmpc_target_deinit call. We will answer this one with the internal + // state. + if (!SPMDCompatibilityTracker.isValidState()) + return nullptr; + if (!SPMDCompatibilityTracker.isAtFixpoint()) { + if (AA) + A.recordDependence(*this, *AA, DepClassTy::OPTIONAL); + UsedAssumedInformation = true; + } else { + UsedAssumedInformation = false; + } + auto *Val = ConstantInt::getSigned( + IntegerType::getInt8Ty(IRP.getAnchorValue().getContext()), + SPMDCompatibilityTracker.isAssumed() ? OMP_TGT_EXEC_MODE_SPMD + : OMP_TGT_EXEC_MODE_GENERIC); + return Val; }; - A.registerGlobalVariableSimplificationCallback( - *KernelEnvGV, KernelConfigurationSimplifyCB); + constexpr const int InitModeArgNo = 1; + constexpr const int DeinitModeArgNo = 1; + constexpr const int InitUseStateMachineArgNo = 2; + A.registerSimplificationCallback( + IRPosition::callsite_argument(*KernelInitCB, InitUseStateMachineArgNo), + StateMachineSimplifyCB); + A.registerSimplificationCallback( + IRPosition::callsite_argument(*KernelInitCB, InitModeArgNo), + ModeSimplifyCB); + A.registerSimplificationCallback( + IRPosition::callsite_argument(*KernelDeinitCB, DeinitModeArgNo), + ModeSimplifyCB); // Check if we know we are in SPMD-mode already. - ConstantInt *ExecModeC = - KernelInfo::getExecModeFromKernelEnvironment(KernelEnvC); - ConstantInt *AssumedExecModeC = ConstantInt::get( - ExecModeC->getType(), - ExecModeC->getSExtValue() | OMP_TGT_EXEC_MODE_GENERIC_SPMD); - if (ExecModeC->getSExtValue() & OMP_TGT_EXEC_MODE_SPMD) + ConstantInt *ModeArg = + dyn_cast(KernelInitCB->getArgOperand(InitModeArgNo)); + if (ModeArg && (ModeArg->getSExtValue() & OMP_TGT_EXEC_MODE_SPMD)) SPMDCompatibilityTracker.indicateOptimisticFixpoint(); + // This is a generic region but SPMDization is disabled so stop tracking. else if (DisableOpenMPOptSPMDization) - // This is a generic region but SPMDization is disabled so stop - // tracking. SPMDCompatibilityTracker.indicatePessimisticFixpoint(); - else - setExecModeOfKernelEnvironment(AssumedExecModeC); - - ConstantInt *MayUseNestedParallelismC = - KernelInfo::getMayUseNestedParallelismFromKernelEnvironment(KernelEnvC); - ConstantInt *AssumedMayUseNestedParallelismC = ConstantInt::get( - MayUseNestedParallelismC->getType(), NestedParallelism); - setMayUseNestedParallelismOfKernelEnvironment( - AssumedMayUseNestedParallelismC); - - if (!DisableOpenMPOptStateMachineRewrite) { - ConstantInt *UseGenericStateMachineC = - KernelInfo::getUseGenericStateMachineFromKernelEnvironment( - KernelEnvC); - ConstantInt *AssumedUseGenericStateMachineC = - ConstantInt::get(UseGenericStateMachineC->getType(), false); - setUseGenericStateMachineOfKernelEnvironment( - AssumedUseGenericStateMachineC); - } // Register virtual uses of functions we might need to preserve. auto RegisterVirtualUse = [&](RuntimeFunction RFKind, @@ -3773,21 +3672,21 @@ struct AAKernelInfoFunction : AAKernelInfo { if (!KernelInitCB || !KernelDeinitCB) return ChangeStatus::UNCHANGED; - ChangeStatus Changed = ChangeStatus::UNCHANGED; + /// Insert nested Parallelism global variable + Function *Kernel = getAnchorScope(); + Module &M = *Kernel->getParent(); + Type *Int8Ty = Type::getInt8Ty(M.getContext()); + new GlobalVariable(M, Int8Ty, /* isConstant */ true, + GlobalValue::WeakAnyLinkage, + ConstantInt::get(Int8Ty, NestedParallelism ? 1 : 0), + Kernel->getName() + "_nested_parallelism"); // If we can we change the execution mode to SPMD-mode otherwise we build a // custom state machine. + ChangeStatus Changed = ChangeStatus::UNCHANGED; if (!changeToSPMDMode(A, Changed)) { if (!KernelInitCB->getCalledFunction()->isDeclaration()) - Changed |= buildCustomStateMachine(A); - } - - // At last, update the KernelEnvc - GlobalVariable *KernelEnvGV = - KernelInfo::getKernelEnvironementGVFromKernelInitCB(KernelInitCB); - if (KernelEnvGV->getInitializer() != KernelEnvC) { - KernelEnvGV->setInitializer(KernelEnvC); - Changed = ChangeStatus::CHANGED; + return buildCustomStateMachine(A); } return Changed; @@ -3857,14 +3756,14 @@ struct AAKernelInfoFunction : AAKernelInfo { // Find escaping outputs from the guarded region to outside users and // broadcast their values to them. for (Instruction &I : *RegionStartBB) { - SmallVector OutsideUses; - for (Use &U : I.uses()) { - Instruction &UsrI = *cast(U.getUser()); + SmallPtrSet OutsideUsers; + for (User *Usr : I.users()) { + Instruction &UsrI = *cast(Usr); if (UsrI.getParent() != RegionStartBB) - OutsideUses.push_back(&U); + OutsideUsers.insert(&UsrI); } - if (OutsideUses.empty()) + if (OutsideUsers.empty()) continue; HasBroadcastValues = true; @@ -3887,8 +3786,8 @@ struct AAKernelInfoFunction : AAKernelInfo { RegionBarrierBB->getTerminator()); // Emit a load instruction and replace uses of the output value. - for (Use *U : OutsideUses) - A.changeUseAfterManifest(*U, *LoadI); + for (Instruction *UsrI : OutsideUsers) + UsrI->replaceUsesOfWith(&I, LoadI); } auto &OMPInfoCache = static_cast(A.getInfoCache()); @@ -4115,11 +4014,16 @@ struct AAKernelInfoFunction : AAKernelInfo { assert(OMPInfoCache.Kernels.count(Kernel) && "Expected kernel function!"); // Check if the kernel is already in SPMD mode, if so, return success. - ConstantStruct *ExistingKernelEnvC = - KernelInfo::getKernelEnvironementFromKernelInitCB(KernelInitCB); - auto *ExecModeC = - KernelInfo::getExecModeFromKernelEnvironment(ExistingKernelEnvC); - const int8_t ExecModeVal = ExecModeC->getSExtValue(); + GlobalVariable *ExecMode = Kernel->getParent()->getGlobalVariable( + (Kernel->getName() + "_exec_mode").str()); + assert(ExecMode && "Kernel without exec mode?"); + assert(ExecMode->getInitializer() && "ExecMode doesn't have initializer!"); + + // Set the global exec mode flag to indicate SPMD-Generic mode. + assert(isa(ExecMode->getInitializer()) && + "ExecMode is not an integer!"); + const int8_t ExecModeVal = + cast(ExecMode->getInitializer())->getSExtValue(); if (ExecModeVal != OMP_TGT_EXEC_MODE_GENERIC) return true; @@ -4137,8 +4041,27 @@ struct AAKernelInfoFunction : AAKernelInfo { // kernel is executed in. assert(ExecModeVal == OMP_TGT_EXEC_MODE_GENERIC && "Initially non-SPMD kernel has SPMD exec mode!"); - setExecModeOfKernelEnvironment(ConstantInt::get( - ExecModeC->getType(), ExecModeVal | OMP_TGT_EXEC_MODE_GENERIC_SPMD)); + ExecMode->setInitializer( + ConstantInt::get(ExecMode->getInitializer()->getType(), + ExecModeVal | OMP_TGT_EXEC_MODE_GENERIC_SPMD)); + + // Next rewrite the init and deinit calls to indicate we use SPMD-mode now. + const int InitModeArgNo = 1; + const int DeinitModeArgNo = 1; + const int InitUseStateMachineArgNo = 2; + + auto &Ctx = getAnchorValue().getContext(); + A.changeUseAfterManifest( + KernelInitCB->getArgOperandUse(InitModeArgNo), + *ConstantInt::getSigned(IntegerType::getInt8Ty(Ctx), + OMP_TGT_EXEC_MODE_SPMD)); + A.changeUseAfterManifest( + KernelInitCB->getArgOperandUse(InitUseStateMachineArgNo), + *ConstantInt::getBool(Ctx, false)); + A.changeUseAfterManifest( + KernelDeinitCB->getArgOperandUse(DeinitModeArgNo), + *ConstantInt::getSigned(IntegerType::getInt8Ty(Ctx), + OMP_TGT_EXEC_MODE_SPMD)); ++NumOpenMPTargetRegionKernelsSPMD; @@ -4165,29 +4088,30 @@ struct AAKernelInfoFunction : AAKernelInfo { OMPRTL___kmpc_kernel_parallel, OMPRTL___kmpc_kernel_end_parallel})) return ChangeStatus::UNCHANGED; - ConstantStruct *ExistingKernelEnvC = - KernelInfo::getKernelEnvironementFromKernelInitCB(KernelInitCB); + const int InitModeArgNo = 1; + const int InitUseStateMachineArgNo = 2; // Check if the current configuration is non-SPMD and generic state machine. // If we already have SPMD mode or a custom state machine we do not need to // go any further. If it is anything but a constant something is weird and // we give up. - ConstantInt *UseStateMachineC = - KernelInfo::getUseGenericStateMachineFromKernelEnvironment( - ExistingKernelEnvC); - ConstantInt *ModeC = - KernelInfo::getExecModeFromKernelEnvironment(ExistingKernelEnvC); + ConstantInt *UseStateMachine = dyn_cast( + KernelInitCB->getArgOperand(InitUseStateMachineArgNo)); + ConstantInt *Mode = + dyn_cast(KernelInitCB->getArgOperand(InitModeArgNo)); // If we are stuck with generic mode, try to create a custom device (=GPU) // state machine which is specialized for the parallel regions that are // reachable by the kernel. - if (UseStateMachineC->isZero() || - (ModeC->getSExtValue() & OMP_TGT_EXEC_MODE_SPMD)) + if (!UseStateMachine || UseStateMachine->isZero() || !Mode || + (Mode->getSExtValue() & OMP_TGT_EXEC_MODE_SPMD)) return ChangeStatus::UNCHANGED; // If not SPMD mode, indicate we use a custom state machine now. - setUseGenericStateMachineOfKernelEnvironment( - ConstantInt::get(UseStateMachineC->getType(), false)); + auto &Ctx = getAnchorValue().getContext(); + auto *FalseVal = ConstantInt::getBool(Ctx, false); + A.changeUseAfterManifest( + KernelInitCB->getArgOperandUse(InitUseStateMachineArgNo), *FalseVal); // If we don't actually need a state machine we are done here. This can // happen if there simply are no parallel regions. In the resulting kernel @@ -4266,7 +4190,6 @@ struct AAKernelInfoFunction : AAKernelInfo { // UserCodeEntryBB: // user code // __kmpc_target_deinit(...) // - auto &Ctx = getAnchorValue().getContext(); Function *Kernel = getAssociatedFunction(); assert(Kernel && "Expected an associated function!"); @@ -4349,7 +4272,7 @@ struct AAKernelInfoFunction : AAKernelInfo { StateMachineBeginBB->end()), DLoc)); - Value *Ident = KernelInfo::getIdentFromKernelEnvironment(KernelEnvC); + Value *Ident = KernelInitCB->getArgOperand(0); Value *GTid = KernelInitCB; FunctionCallee BarrierFn = @@ -4479,46 +4402,6 @@ struct AAKernelInfoFunction : AAKernelInfo { ChangeStatus updateImpl(Attributor &A) override { KernelInfoState StateBefore = getState(); - // When we leave this function this RAII will make sure the member - // KernelEnvC is updated properly depending on the state. That member is - // used for simplification of values and needs to be up to date at all - // times. - struct UpdateKernelEnvCRAII { - AAKernelInfoFunction &AA; - - UpdateKernelEnvCRAII(AAKernelInfoFunction &AA) : AA(AA) {} - - ~UpdateKernelEnvCRAII() { - if (!AA.KernelEnvC) - return; - - ConstantStruct *ExistingKernelEnvC = - KernelInfo::getKernelEnvironementFromKernelInitCB(AA.KernelInitCB); - - if (!AA.isValidState()) { - AA.KernelEnvC = ExistingKernelEnvC; - return; - } - - if (!AA.ReachedKnownParallelRegions.isValidState()) - AA.setUseGenericStateMachineOfKernelEnvironment( - KernelInfo::getUseGenericStateMachineFromKernelEnvironment( - ExistingKernelEnvC)); - - if (!AA.SPMDCompatibilityTracker.isValidState()) - AA.setExecModeOfKernelEnvironment( - KernelInfo::getExecModeFromKernelEnvironment(ExistingKernelEnvC)); - - ConstantInt *MayUseNestedParallelismC = - KernelInfo::getMayUseNestedParallelismFromKernelEnvironment( - AA.KernelEnvC); - ConstantInt *NewMayUseNestedParallelismC = ConstantInt::get( - MayUseNestedParallelismC->getType(), AA.NestedParallelism); - AA.setMayUseNestedParallelismOfKernelEnvironment( - NewMayUseNestedParallelismC); - } - } RAII(*this); - // Callback to check a read/write instruction. auto CheckRWInst = [&](Instruction &I) { // We handle calls later. diff --git a/llvm/test/Transforms/Attributor/reduced/aa_execution_domain_wrong_fn.ll b/llvm/test/Transforms/Attributor/reduced/aa_execution_domain_wrong_fn.ll index 1826b4aea180e..ed36ebcd3aa8d 100644 --- a/llvm/test/Transforms/Attributor/reduced/aa_execution_domain_wrong_fn.ll +++ b/llvm/test/Transforms/Attributor/reduced/aa_execution_domain_wrong_fn.ll @@ -7,13 +7,13 @@ @_ZN4ompx5state9TeamStateE = internal addrspace(3) global %"struct.ompx::state::TeamStateTy" undef define weak_odr amdgpu_kernel void @__omp_offloading_16_1d1156__Z38test_target_teams_distribute__parallelv_l16() { - %1 = tail call i32 @__kmpc_target_init(ptr null) + %1 = tail call i32 @__kmpc_target_init(ptr null, i8 0, i1 false) ret void } -define internal i32 @__kmpc_target_init(ptr %0) { +define internal i32 @__kmpc_target_init(ptr %0, i8 %1, i1 %2) { store <2 x i32> zeroinitializer, ptr addrspace(3) @_ZN4ompx5state9TeamStateE, align 16 - %2 = call i1 @__kmpc_kernel_parallel() + %4 = call i1 @__kmpc_kernel_parallel() ret i32 0 } @@ -29,13 +29,13 @@ define internal i1 @__kmpc_kernel_parallel() { ; CHECK: @[[_ZN4OMPX5STATE9TEAMSTATEE:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global %"struct.ompx::state::TeamStateTy" undef ;. ; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_16_1d1156__Z38test_target_teams_distribute__parallelv_l16() { -; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @__kmpc_target_init(ptr null) +; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @__kmpc_target_init(ptr null, i8 0, i1 false) ; CHECK-NEXT: ret void ; ; ; CHECK: Function Attrs: norecurse nosync nounwind memory(write) ; CHECK-LABEL: define {{[^@]+}}@__kmpc_target_init -; CHECK-SAME: (ptr [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: ret i32 0 ; ;. diff --git a/llvm/test/Transforms/Attributor/value-simplify-local-remote.ll b/llvm/test/Transforms/Attributor/value-simplify-local-remote.ll index f9ad6b3232de0..e081a1aa59a89 100644 --- a/llvm/test/Transforms/Attributor/value-simplify-local-remote.ll +++ b/llvm/test/Transforms/Attributor/value-simplify-local-remote.ll @@ -310,7 +310,7 @@ entry: define weak_odr void @t3() { ; CHECK-LABEL: define {{[^@]+}}@t3() { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr noalias nocapture noundef align 4294967296 null) +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr noalias nocapture noundef align 4294967296 null, i8 noundef 0, i1 noundef false, i1 noundef false) ; CHECK-NEXT: br label [[USER_CODE_ENTRY:%.*]] ; CHECK: user_code.entry: ; CHECK-NEXT: br label [[FOR_COND:%.*]] @@ -321,7 +321,7 @@ define weak_odr void @t3() { ; CHECK-NEXT: ret void ; entry: - %0 = call i32 @__kmpc_target_init(ptr null) + %0 = call i32 @__kmpc_target_init(ptr null, i8 0, i1 false, i1 false) br label %user_code.entry user_code.entry: ; preds = %entry @@ -335,7 +335,7 @@ for.body: ; preds = %for.cond ret void } -declare i32 @__kmpc_target_init(ptr) +declare i32 @__kmpc_target_init(ptr, i8, i1, i1) define %S.2 @t3.helper() { ; CHECK-LABEL: define {{[^@]+}}@t3.helper() { diff --git a/llvm/test/Transforms/OpenMP/add_attributes.ll b/llvm/test/Transforms/OpenMP/add_attributes.ll index e5ea89c7b522f..73d434e2062e9 100644 --- a/llvm/test/Transforms/OpenMP/add_attributes.ll +++ b/llvm/test/Transforms/OpenMP/add_attributes.ll @@ -736,9 +736,9 @@ declare i32 @__kmpc_shuffle_int32(i32, i16, i16); declare i64 @__kmpc_shuffle_int64(i64, i16, i16); -declare void @__kmpc_target_deinit(); +declare void @__kmpc_target_deinit(ptr, i8); -declare i32 @__kmpc_target_init(ptr); +declare i32 @__kmpc_target_init(ptr, i8, i1); declare void @__tgt_interop_destroy(ptr, i32, ptr, i32, i32, ptr, i32); @@ -1389,10 +1389,10 @@ declare i32 @__tgt_target_kernel_nowait(ptr, i64, i32, i32, ptr, ptr, i32, ptr, ; CHECK: declare i64 @__kmpc_shuffle_int64(i64, i16, i16) ; CHECK-NOT: Function Attrs -; CHECK: declare void @__kmpc_target_deinit() +; CHECK: declare void @__kmpc_target_deinit(ptr, i8) ; CHECK-NOT: Function Attrs -; CHECK: declare i32 @__kmpc_target_init(ptr) +; CHECK: declare i32 @__kmpc_target_init(ptr, i8, i1) ; CHECK-NOT: Function Attrs ; CHECK: declare void @__tgt_interop_destroy(ptr, i32, ptr, i32, i32, ptr, i32) @@ -2037,10 +2037,10 @@ declare i32 @__tgt_target_kernel_nowait(ptr, i64, i32, i32, ptr, ptr, i32, ptr, ; OPTIMISTIC: declare i64 @__kmpc_shuffle_int64(i64, i16, i16) ; OPTIMISTIC-NOT: Function Attrs -; OPTIMISTIC: declare void @__kmpc_target_deinit() +; OPTIMISTIC: declare void @__kmpc_target_deinit(ptr, i8) ; OPTIMISTIC-NOT: Function Attrs -; OPTIMISTIC: declare i32 @__kmpc_target_init(ptr) +; OPTIMISTIC: declare i32 @__kmpc_target_init(ptr, i8, i1) ; OPTIMISTIC-NOT: Function Attrs ; OPTIMISTIC: declare void @__tgt_interop_destroy(ptr, i32, ptr, i32, i32, ptr, i32) @@ -2701,7 +2701,7 @@ declare i32 @__tgt_target_kernel_nowait(ptr, i64, i32, i32, ptr, ptr, i32, ptr, ; EXT: declare void @__kmpc_target_deinit(ptr, i8 signext) ; EXT-NOT: Function Attrs -; EXT: declare signext i32 @__kmpc_target_init(ptr) +; EXT: declare signext i32 @__kmpc_target_init(ptr, i8 signext, i1 signext) ; EXT-NOT: Function Attrs ; EXT: declare void @__tgt_interop_destroy(ptr, i32 signext, ptr, i32 signext, i32 signext, ptr, i32 signext) diff --git a/llvm/test/Transforms/OpenMP/always_inline_device.ll b/llvm/test/Transforms/OpenMP/always_inline_device.ll index c571ff2308840..612c18725d3ba 100644 --- a/llvm/test/Transforms/OpenMP/always_inline_device.ll +++ b/llvm/test/Transforms/OpenMP/always_inline_device.ll @@ -1,27 +1,19 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes ; RUN: opt < %s -S -passes=openmp-opt -openmp-opt-inline-device | FileCheck %s %struct.ident_t = type { i32, i32, i32, i32, ptr } -%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr } -%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 } @0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8 +@__omp_offloading_fd02_c0934fc2_foo_l4_exec_mode = weak constant i8 1 +@llvm.compiler.used = appending global [1 x ptr] [ptr @__omp_offloading_fd02_c0934fc2_foo_l4_exec_mode], section "llvm.metadata" @G = external global i8 -@kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null } - -; Function Attrs: convergent norecurse nounwind -;. -; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" -; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 -; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i8 -; CHECK: @[[KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 3 }, ptr @[[GLOB1]], ptr null } -;. +; Function Attrs: norecurse nounwind define weak void @__omp_offloading_fd02_c0934fc2_foo_l4() #0 { ; CHECK: Function Attrs: norecurse nounwind ; CHECK-LABEL: @__omp_offloading_fd02_c0934fc2_foo_l4( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @kernel_environment) +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) ; CHECK-NEXT: [[THREAD_ID_IN_BLOCK:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() ; CHECK-NEXT: [[THREAD_IS_MAIN:%.*]] = icmp ne i32 [[THREAD_ID_IN_BLOCK]], 0 ; CHECK-NEXT: br i1 [[THREAD_IS_MAIN]], label [[EXIT_THREADS:%.*]], label [[MAIN_THREAD_USER_CODE:%.*]] @@ -32,13 +24,13 @@ define weak void @__omp_offloading_fd02_c0934fc2_foo_l4() #0 { ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK: user_code.entry: ; CHECK-NEXT: store i8 1, ptr @G, align 1 -; CHECK-NEXT: call void @__kmpc_target_deinit() +; CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) ; CHECK-NEXT: ret void ; CHECK: worker.exit: ; CHECK-NEXT: ret void ; entry: - %0 = call i32 @__kmpc_target_init(ptr @kernel_environment) + %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -50,7 +42,7 @@ user_code.entry: ; preds = %entry %isSPMD = call i8 @__kmpc_is_spmd_exec_mode() store i8 %isSPMD, ptr @G call void @bar() #2 - call void @__kmpc_target_deinit() + call void @__kmpc_target_deinit(ptr @1, i8 1) ret void worker.exit: ; preds = %entry @@ -59,9 +51,9 @@ worker.exit: ; preds = %entry declare i8 @__kmpc_is_spmd_exec_mode() -declare i32 @__kmpc_target_init(ptr) +declare i32 @__kmpc_target_init(ptr, i8, i1) -declare void @__kmpc_target_deinit() +declare void @__kmpc_target_deinit(ptr, i8) ; Function Attrs: convergent nounwind define hidden void @bar() #1 { @@ -91,17 +83,3 @@ attributes #2 = { convergent } !5 = !{i32 7, !"PIC Level", i32 2} !6 = !{i32 7, !"frame-pointer", i32 2} !7 = !{!"clang version 14.0.0"} -;. -; CHECK: attributes #[[ATTR0:[0-9]+]] = { norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_70" "target-features"="+ptx32,+sm_70" } -; CHECK: attributes #[[ATTR1:[0-9]+]] = { alwaysinline convergent nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_70" "target-features"="+ptx32,+sm_70" } -; CHECK: attributes #[[ATTR2:[0-9]+]] = { nounwind } -;. -; CHECK: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 -1064087614, !"foo", i32 4, i32 0} -; CHECK: [[META1:![0-9]+]] = !{ptr @__omp_offloading_fd02_c0934fc2_foo_l4, !"kernel", i32 1} -; CHECK: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} -; CHECK: [[META3:![0-9]+]] = !{i32 7, !"openmp", i32 50} -; CHECK: [[META4:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} -; CHECK: [[META5:![0-9]+]] = !{i32 8, !"PIC Level", i32 2} -; CHECK: [[META6:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} -; CHECK: [[META7:![0-9]+]] = !{!"clang version 14.0.0"} -;. diff --git a/llvm/test/Transforms/OpenMP/custom_state_machines.ll b/llvm/test/Transforms/OpenMP/custom_state_machines.ll index ff311e21e4624..fe20653af18bf 100644 --- a/llvm/test/Transforms/OpenMP/custom_state_machines.ll +++ b/llvm/test/Transforms/OpenMP/custom_state_machines.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals --include-generated-funcs +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --include-generated-funcs ; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt < %s | FileCheck %s --check-prefixes=AMDGPU ; RUN: opt --mtriple=nvptx64-- -S -passes=openmp-opt < %s | FileCheck %s --check-prefixes=NVPTX ; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -openmp-opt-disable-state-machine-rewrite -S -passes=openmp-opt < %s | FileCheck %s --check-prefixes=AMDGPU-DISABLED @@ -120,30 +120,28 @@ ;; } %struct.ident_t = type { i32, i32, i32, i32, ptr } -%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr } -%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 } @0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8 +@__omp_offloading_14_a36502b_no_state_machine_needed_l14_exec_mode = weak constant i8 1 +@__omp_offloading_14_a36502b_simple_state_machine_l22_exec_mode = weak constant i8 1 +@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_exec_mode = weak constant i8 1 +@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_exec_mode = weak constant i8 1 +@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_exec_mode = weak constant i8 1 +@__omp_offloading_14_a36502b_simple_state_machine_pure_l77_exec_mode = weak constant i8 1 +@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_exec_mode = weak constant i8 1 +@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_exec_mode = weak constant i8 1 @2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @0 }, align 8 @G = external global i32, align 4 @3 = private unnamed_addr constant %struct.ident_t { i32 0, i32 322, i32 2, i32 0, ptr @0 }, align 8 - -@__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null } -@__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null } -@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null } -@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null } -@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null } -@__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null } -@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null } -@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null } +@llvm.compiler.used = appending global [8 x ptr] [ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_exec_mode, ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_exec_mode, ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_exec_mode, ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_exec_mode, ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_exec_mode, ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_exec_mode, ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_exec_mode, ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_exec_mode], section "llvm.metadata" define weak void @__omp_offloading_14_a36502b_no_state_machine_needed_l14() #0 { entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 store i32 0, ptr %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment) + %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -151,7 +149,7 @@ user_code.entry: ; preds = %entry %1 = call i32 @__kmpc_global_thread_num(ptr @1) store i32 %1, ptr %.threadid_temp., align 4 call void @__omp_outlined__(ptr %.threadid_temp., ptr %.zero.addr) #3 - call void @__kmpc_target_deinit() + call void @__kmpc_target_deinit(ptr @1, i8 1) ret void worker.exit: ; preds = %entry @@ -159,7 +157,7 @@ worker.exit: ; preds = %entry } ; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning. -define weak i32 @__kmpc_target_init(ptr) { +define weak i32 @__kmpc_target_init(ptr, i8, i1) { ret i32 0 } @@ -195,14 +193,14 @@ declare void @unknown_no_openmp() #2 declare i32 @__kmpc_global_thread_num(ptr) #3 -declare void @__kmpc_target_deinit() +declare void @__kmpc_target_deinit(ptr, i8) define weak void @__omp_offloading_14_a36502b_simple_state_machine_l22() #0 { entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 store i32 0, ptr %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment) + %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -210,7 +208,7 @@ user_code.entry: ; preds = %entry %1 = call i32 @__kmpc_global_thread_num(ptr @1) store i32 %1, ptr %.threadid_temp., align 4 call void @__omp_outlined__1(ptr %.threadid_temp., ptr %.zero.addr) #3 - call void @__kmpc_target_deinit() + call void @__kmpc_target_deinit(ptr @1, i8 1) ret void worker.exit: ; preds = %entry @@ -295,7 +293,7 @@ entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 store i32 0, ptr %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment) + %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -303,7 +301,7 @@ user_code.entry: ; preds = %entry %1 = call i32 @__kmpc_global_thread_num(ptr @1) store i32 %1, ptr %.threadid_temp., align 4 call void @__omp_outlined__4(ptr %.threadid_temp., ptr %.zero.addr) #3 - call void @__kmpc_target_deinit() + call void @__kmpc_target_deinit(ptr @1, i8 1) ret void worker.exit: ; preds = %entry @@ -372,7 +370,7 @@ entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 store i32 0, ptr %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment) + %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -380,7 +378,7 @@ user_code.entry: ; preds = %entry %1 = call i32 @__kmpc_global_thread_num(ptr @1) store i32 %1, ptr %.threadid_temp., align 4 call void @__omp_outlined__6(ptr %.threadid_temp., ptr %.zero.addr) #3 - call void @__kmpc_target_deinit() + call void @__kmpc_target_deinit(ptr @1, i8 1) ret void worker.exit: ; preds = %entry @@ -458,7 +456,7 @@ entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 store i32 0, ptr %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment) + %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -466,7 +464,7 @@ user_code.entry: ; preds = %entry %1 = call i32 @__kmpc_global_thread_num(ptr @1) store i32 %1, ptr %.threadid_temp., align 4 call void @__omp_outlined__9(ptr %.threadid_temp., ptr %.zero.addr) #3 - call void @__kmpc_target_deinit() + call void @__kmpc_target_deinit(ptr @1, i8 1) ret void worker.exit: ; preds = %entry @@ -542,7 +540,7 @@ entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 store i32 0, ptr %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment) + %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -550,7 +548,7 @@ user_code.entry: ; preds = %entry %1 = call i32 @__kmpc_global_thread_num(ptr @1) store i32 %1, ptr %.threadid_temp., align 4 call void @__omp_outlined__12(ptr %.threadid_temp., ptr %.zero.addr) #3 - call void @__kmpc_target_deinit() + call void @__kmpc_target_deinit(ptr @1, i8 1) ret void worker.exit: ; preds = %entry @@ -629,7 +627,7 @@ entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 store i32 0, ptr %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment) + %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -637,7 +635,7 @@ user_code.entry: ; preds = %entry %1 = call i32 @__kmpc_global_thread_num(ptr @1) store i32 %1, ptr %.threadid_temp., align 4 call void @__omp_outlined__15(ptr %.threadid_temp., ptr %.zero.addr) #3 - call void @__kmpc_target_deinit() + call void @__kmpc_target_deinit(ptr @1, i8 1) ret void worker.exit: ; preds = %entry @@ -684,7 +682,7 @@ entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 store i32 0, ptr %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment) + %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -692,7 +690,7 @@ user_code.entry: ; preds = %entry %1 = call i32 @__kmpc_global_thread_num(ptr @1) store i32 %1, ptr %.threadid_temp., align 4 call void @__omp_outlined__16(ptr %.threadid_temp., ptr %.zero.addr) #3 - call void @__kmpc_target_deinit() + call void @__kmpc_target_deinit(ptr @1, i8 1) ret void worker.exit: ; preds = %entry @@ -834,101 +832,26 @@ attributes #9 = { convergent nounwind readonly willreturn } !16 = !{i32 1, !"wchar_size", i32 4} !17 = !{i32 7, !"openmp", i32 50} !18 = !{i32 7, !"openmp-device", i32 50} -;. -; AMDGPU: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" -; AMDGPU: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 -; AMDGPU: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 2, i32 0, ptr @[[GLOB0]] }, align 8 -; AMDGPU: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i32, align 4 -; AMDGPU: @[[GLOB3:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 322, i32 2, i32 0, ptr @[[GLOB0]] }, align 8 -; AMDGPU: @[[__OMP_OFFLOADING_14_A36502B_NO_STATE_MACHINE_NEEDED_L14_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null } -; AMDGPU: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_L22_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; AMDGPU: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_INTERPROCEDURAL_L39_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; AMDGPU: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_WITH_FALLBACK_L55_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; AMDGPU: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_NO_OPENMP_ATTR_L66_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; AMDGPU: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_PURE_L77_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; AMDGPU: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_INTERPROCEDURAL_NESTED_RECURSIVE_L92_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; AMDGPU: @[[__OMP_OFFLOADING_14_A36502B_NO_STATE_MACHINE_WEAK_CALLEE_L112_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null } -; AMDGPU: @[[__OMP_OUTLINED__2_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef -; AMDGPU: @[[__OMP_OUTLINED__3_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef -; AMDGPU: @[[__OMP_OUTLINED__5_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef -; AMDGPU: @[[__OMP_OUTLINED__7_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef -; AMDGPU: @[[__OMP_OUTLINED__8_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef -; AMDGPU: @[[__OMP_OUTLINED__10_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef -; AMDGPU: @[[__OMP_OUTLINED__11_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef -; AMDGPU: @[[__OMP_OUTLINED__13_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef -; AMDGPU: @[[__OMP_OUTLINED__14_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef -;. -; NVPTX: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" -; NVPTX: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 -; NVPTX: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 2, i32 0, ptr @[[GLOB0]] }, align 8 -; NVPTX: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i32, align 4 -; NVPTX: @[[GLOB3:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 322, i32 2, i32 0, ptr @[[GLOB0]] }, align 8 -; NVPTX: @[[__OMP_OFFLOADING_14_A36502B_NO_STATE_MACHINE_NEEDED_L14_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null } -; NVPTX: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_L22_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; NVPTX: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_INTERPROCEDURAL_L39_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; NVPTX: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_WITH_FALLBACK_L55_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; NVPTX: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_NO_OPENMP_ATTR_L66_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; NVPTX: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_PURE_L77_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; NVPTX: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_INTERPROCEDURAL_NESTED_RECURSIVE_L92_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; NVPTX: @[[__OMP_OFFLOADING_14_A36502B_NO_STATE_MACHINE_WEAK_CALLEE_L112_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null } -; NVPTX: @[[__OMP_OUTLINED__2_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef -; NVPTX: @[[__OMP_OUTLINED__3_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef -; NVPTX: @[[__OMP_OUTLINED__5_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef -; NVPTX: @[[__OMP_OUTLINED__7_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef -; NVPTX: @[[__OMP_OUTLINED__8_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef -; NVPTX: @[[__OMP_OUTLINED__10_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef -; NVPTX: @[[__OMP_OUTLINED__11_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef -; NVPTX: @[[__OMP_OUTLINED__13_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef -; NVPTX: @[[__OMP_OUTLINED__14_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef -;. -; AMDGPU-DISABLED: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" -; AMDGPU-DISABLED: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 -; AMDGPU-DISABLED: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 2, i32 0, ptr @[[GLOB0]] }, align 8 -; AMDGPU-DISABLED: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i32, align 4 -; AMDGPU-DISABLED: @[[GLOB3:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 322, i32 2, i32 0, ptr @[[GLOB0]] }, align 8 -; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_14_A36502B_NO_STATE_MACHINE_NEEDED_L14_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null } -; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_L22_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_INTERPROCEDURAL_L39_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_WITH_FALLBACK_L55_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_NO_OPENMP_ATTR_L66_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_PURE_L77_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_INTERPROCEDURAL_NESTED_RECURSIVE_L92_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_14_A36502B_NO_STATE_MACHINE_WEAK_CALLEE_L112_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null } -;. -; NVPTX-DISABLED: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" -; NVPTX-DISABLED: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 -; NVPTX-DISABLED: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 2, i32 0, ptr @[[GLOB0]] }, align 8 -; NVPTX-DISABLED: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i32, align 4 -; NVPTX-DISABLED: @[[GLOB3:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 322, i32 2, i32 0, ptr @[[GLOB0]] }, align 8 -; NVPTX-DISABLED: @[[__OMP_OFFLOADING_14_A36502B_NO_STATE_MACHINE_NEEDED_L14_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null } -; NVPTX-DISABLED: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_L22_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; NVPTX-DISABLED: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_INTERPROCEDURAL_L39_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; NVPTX-DISABLED: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_WITH_FALLBACK_L55_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; NVPTX-DISABLED: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_NO_OPENMP_ATTR_L66_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; NVPTX-DISABLED: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_PURE_L77_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; NVPTX-DISABLED: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_INTERPROCEDURAL_NESTED_RECURSIVE_L92_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; NVPTX-DISABLED: @[[__OMP_OFFLOADING_14_A36502B_NO_STATE_MACHINE_WEAK_CALLEE_L112_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null } -;. ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14 ; AMDGPU-SAME: () #[[ATTR0:[0-9]+]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 false) ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU: user_code.entry: ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]] ; AMDGPU-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit() +; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: ; AMDGPU-NEXT: ret void ; ; ; AMDGPU-LABEL: define {{[^@]+}}@__kmpc_target_init -; AMDGPU-SAME: (ptr [[TMP0:%.*]]) { +; AMDGPU-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { ; AMDGPU-NEXT: ret i32 0 ; ; @@ -947,7 +870,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized ; AMDGPU-SAME: () #[[ATTR1:[0-9]+]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) #[[ATTR3]] ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] ; AMDGPU-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; AMDGPU-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] @@ -956,7 +879,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] ; AMDGPU-NEXT: br label [[OMP_IF_END]] ; AMDGPU: omp_if.end: -; AMDGPU-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP0]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void ; ; @@ -984,7 +907,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU: is_worker_check: @@ -1031,7 +954,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 ; AMDGPU-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit() +; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: ; AMDGPU-NEXT: ret void @@ -1108,7 +1031,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU: is_worker_check: @@ -1161,7 +1084,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 ; AMDGPU-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit() +; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: ; AMDGPU-NEXT: ret void @@ -1255,7 +1178,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU: is_worker_check: @@ -1304,7 +1227,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 ; AMDGPU-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit() +; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: ; AMDGPU-NEXT: ret void @@ -1380,7 +1303,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU: is_worker_check: @@ -1427,7 +1350,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 ; AMDGPU-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit() +; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: ; AMDGPU-NEXT: ret void @@ -1503,7 +1426,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU: is_worker_check: @@ -1550,7 +1473,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 ; AMDGPU-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit() +; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: ; AMDGPU-NEXT: ret void @@ -1626,7 +1549,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU: is_worker_check: @@ -1668,7 +1591,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU: user_code.entry: ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit() +; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: ; AMDGPU-NEXT: ret void @@ -1734,7 +1657,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU: is_worker_check: @@ -1770,7 +1693,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU: user_code.entry: ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit() +; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: ; AMDGPU-NEXT: ret void @@ -1888,20 +1811,20 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 false) ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX: user_code.entry: ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]] ; NVPTX-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_target_deinit() +; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: ; NVPTX-NEXT: ret void ; ; ; NVPTX-LABEL: define {{[^@]+}}@__kmpc_target_init -; NVPTX-SAME: (ptr [[TMP0:%.*]]) { +; NVPTX-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { ; NVPTX-NEXT: ret i32 0 ; ; @@ -1920,7 +1843,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized ; NVPTX-SAME: () #[[ATTR1:[0-9]+]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) #[[ATTR3]] ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] ; NVPTX-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; NVPTX-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] @@ -1929,7 +1852,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] ; NVPTX-NEXT: br label [[OMP_IF_END]] ; NVPTX: omp_if.end: -; NVPTX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP0]]) #[[ATTR3]] ; NVPTX-NEXT: ret void ; ; @@ -1957,7 +1880,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX: is_worker_check: @@ -2003,7 +1926,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] ; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 ; NVPTX-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_target_deinit() +; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: ; NVPTX-NEXT: ret void @@ -2080,7 +2003,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX: is_worker_check: @@ -2132,7 +2055,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] ; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 ; NVPTX-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_target_deinit() +; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: ; NVPTX-NEXT: ret void @@ -2226,7 +2149,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX: is_worker_check: @@ -2274,7 +2197,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] ; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 ; NVPTX-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_target_deinit() +; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: ; NVPTX-NEXT: ret void @@ -2350,7 +2273,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX: is_worker_check: @@ -2396,7 +2319,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] ; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 ; NVPTX-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_target_deinit() +; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: ; NVPTX-NEXT: ret void @@ -2472,7 +2395,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX: is_worker_check: @@ -2518,7 +2441,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] ; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 ; NVPTX-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_target_deinit() +; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: ; NVPTX-NEXT: ret void @@ -2594,7 +2517,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX: is_worker_check: @@ -2635,7 +2558,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX: user_code.entry: ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] ; NVPTX-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_target_deinit() +; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: ; NVPTX-NEXT: ret void @@ -2701,7 +2624,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX: is_worker_check: @@ -2736,7 +2659,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX: user_code.entry: ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] ; NVPTX-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_target_deinit() +; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: ; NVPTX-NEXT: ret void @@ -2854,20 +2777,20 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) ; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU-DISABLED: user_code.entry: ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]] ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit() +; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: worker.exit: ; AMDGPU-DISABLED-NEXT: ret void ; ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__kmpc_target_init -; AMDGPU-DISABLED-SAME: (ptr [[TMP0:%.*]]) { +; AMDGPU-DISABLED-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { ; AMDGPU-DISABLED-NEXT: ret i32 0 ; ; @@ -2886,7 +2809,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized ; AMDGPU-DISABLED-SAME: () #[[ATTR1:[0-9]+]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; AMDGPU-DISABLED-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] @@ -2895,7 +2818,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: br label [[OMP_IF_END]] ; AMDGPU-DISABLED: omp_if.end: -; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP0]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void ; ; @@ -2922,14 +2845,14 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) ; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU-DISABLED: user_code.entry: ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit() +; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: worker.exit: ; AMDGPU-DISABLED-NEXT: ret void @@ -3005,14 +2928,14 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) ; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU-DISABLED: user_code.entry: ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit() +; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: worker.exit: ; AMDGPU-DISABLED-NEXT: ret void @@ -3105,14 +3028,14 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) ; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU-DISABLED: user_code.entry: ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit() +; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: worker.exit: ; AMDGPU-DISABLED-NEXT: ret void @@ -3187,14 +3110,14 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) ; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU-DISABLED: user_code.entry: ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit() +; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: worker.exit: ; AMDGPU-DISABLED-NEXT: ret void @@ -3269,14 +3192,14 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) ; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU-DISABLED: user_code.entry: ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit() +; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: worker.exit: ; AMDGPU-DISABLED-NEXT: ret void @@ -3351,13 +3274,13 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) ; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU-DISABLED: user_code.entry: ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit() +; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: worker.exit: ; AMDGPU-DISABLED-NEXT: ret void @@ -3422,13 +3345,13 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) ; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU-DISABLED: user_code.entry: ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit() +; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: worker.exit: ; AMDGPU-DISABLED-NEXT: ret void @@ -3546,20 +3469,20 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) ; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX-DISABLED: user_code.entry: ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]] ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit() +; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: worker.exit: ; NVPTX-DISABLED-NEXT: ret void ; ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__kmpc_target_init -; NVPTX-DISABLED-SAME: (ptr [[TMP0:%.*]]) { +; NVPTX-DISABLED-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { ; NVPTX-DISABLED-NEXT: ret i32 0 ; ; @@ -3578,7 +3501,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized ; NVPTX-DISABLED-SAME: () #[[ATTR1:[0-9]+]] { ; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; NVPTX-DISABLED-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] @@ -3587,7 +3510,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: br label [[OMP_IF_END]] ; NVPTX-DISABLED: omp_if.end: -; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP0]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: ret void ; ; @@ -3614,14 +3537,14 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) ; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX-DISABLED: user_code.entry: ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit() +; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: worker.exit: ; NVPTX-DISABLED-NEXT: ret void @@ -3697,14 +3620,14 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) ; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX-DISABLED: user_code.entry: ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit() +; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: worker.exit: ; NVPTX-DISABLED-NEXT: ret void @@ -3797,14 +3720,14 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) ; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX-DISABLED: user_code.entry: ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit() +; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: worker.exit: ; NVPTX-DISABLED-NEXT: ret void @@ -3879,14 +3802,14 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) ; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX-DISABLED: user_code.entry: ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit() +; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: worker.exit: ; NVPTX-DISABLED-NEXT: ret void @@ -3961,14 +3884,14 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) ; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX-DISABLED: user_code.entry: ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit() +; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: worker.exit: ; NVPTX-DISABLED-NEXT: ret void @@ -4043,13 +3966,13 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) ; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX-DISABLED: user_code.entry: ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit() +; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: worker.exit: ; NVPTX-DISABLED-NEXT: ret void @@ -4114,13 +4037,13 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) ; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX-DISABLED: user_code.entry: ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit() +; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: worker.exit: ; NVPTX-DISABLED-NEXT: ret void @@ -4231,140 +4154,3 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: ret void ; -;. -; AMDGPU: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; AMDGPU: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; AMDGPU: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; AMDGPU: attributes #[[ATTR3]] = { nounwind } -; AMDGPU: attributes #[[ATTR4:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; AMDGPU: attributes #[[ATTR5:[0-9]+]] = { alwaysinline } -; AMDGPU: attributes #[[ATTR6]] = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; AMDGPU: attributes #[[ATTR7:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; AMDGPU: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; AMDGPU: attributes #[[ATTR9]] = { noinline norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; AMDGPU: attributes #[[ATTR10]] = { convergent nounwind } -; AMDGPU: attributes #[[ATTR11]] = { convergent "llvm.assume"="omp_no_openmp" } -; AMDGPU: attributes #[[ATTR12]] = { convergent } -;. -; NVPTX: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; NVPTX: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; NVPTX: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; NVPTX: attributes #[[ATTR3]] = { nounwind } -; NVPTX: attributes #[[ATTR4:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; NVPTX: attributes #[[ATTR5:[0-9]+]] = { alwaysinline } -; NVPTX: attributes #[[ATTR6]] = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; NVPTX: attributes #[[ATTR7:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; NVPTX: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; NVPTX: attributes #[[ATTR9]] = { noinline norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; NVPTX: attributes #[[ATTR10]] = { convergent nounwind } -; NVPTX: attributes #[[ATTR11]] = { convergent "llvm.assume"="omp_no_openmp" } -; NVPTX: attributes #[[ATTR12]] = { convergent } -;. -; AMDGPU-DISABLED: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; AMDGPU-DISABLED: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; AMDGPU-DISABLED: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; AMDGPU-DISABLED: attributes #[[ATTR3]] = { nounwind } -; AMDGPU-DISABLED: attributes #[[ATTR4:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; AMDGPU-DISABLED: attributes #[[ATTR5:[0-9]+]] = { alwaysinline } -; AMDGPU-DISABLED: attributes #[[ATTR6]] = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; AMDGPU-DISABLED: attributes #[[ATTR7:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; AMDGPU-DISABLED: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; AMDGPU-DISABLED: attributes #[[ATTR9]] = { noinline norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; AMDGPU-DISABLED: attributes #[[ATTR10]] = { convergent nounwind } -; AMDGPU-DISABLED: attributes #[[ATTR11]] = { convergent "llvm.assume"="omp_no_openmp" } -; AMDGPU-DISABLED: attributes #[[ATTR12]] = { convergent } -;. -; NVPTX-DISABLED: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; NVPTX-DISABLED: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; NVPTX-DISABLED: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; NVPTX-DISABLED: attributes #[[ATTR3]] = { nounwind } -; NVPTX-DISABLED: attributes #[[ATTR4:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; NVPTX-DISABLED: attributes #[[ATTR5:[0-9]+]] = { alwaysinline } -; NVPTX-DISABLED: attributes #[[ATTR6]] = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; NVPTX-DISABLED: attributes #[[ATTR7:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; NVPTX-DISABLED: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; NVPTX-DISABLED: attributes #[[ATTR9]] = { noinline norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; NVPTX-DISABLED: attributes #[[ATTR10]] = { convergent nounwind } -; NVPTX-DISABLED: attributes #[[ATTR11]] = { convergent "llvm.assume"="omp_no_openmp" } -; NVPTX-DISABLED: attributes #[[ATTR12]] = { convergent } -;. -; AMDGPU: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2} -; AMDGPU: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4} -; AMDGPU: [[META2:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_needed", i32 14, i32 0} -; AMDGPU: [[META3:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_with_fallback", i32 55, i32 3} -; AMDGPU: [[META4:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_pure", i32 77, i32 5} -; AMDGPU: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6} -; AMDGPU: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7} -; AMDGPU: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1} -; AMDGPU: [[META8:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1} -; AMDGPU: [[META9:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1} -; AMDGPU: [[META10:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1} -; AMDGPU: [[META11:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1} -; AMDGPU: [[META12:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1} -; AMDGPU: [[META13:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1} -; AMDGPU: [[META14:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1} -; AMDGPU: [[META15:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1} -; AMDGPU: [[META16:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} -; AMDGPU: [[META17:![0-9]+]] = !{i32 7, !"openmp", i32 50} -; AMDGPU: [[META18:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} -;. -; NVPTX: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2} -; NVPTX: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4} -; NVPTX: [[META2:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_needed", i32 14, i32 0} -; NVPTX: [[META3:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_with_fallback", i32 55, i32 3} -; NVPTX: [[META4:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_pure", i32 77, i32 5} -; NVPTX: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6} -; NVPTX: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7} -; NVPTX: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1} -; NVPTX: [[META8:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1} -; NVPTX: [[META9:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1} -; NVPTX: [[META10:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1} -; NVPTX: [[META11:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1} -; NVPTX: [[META12:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1} -; NVPTX: [[META13:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1} -; NVPTX: [[META14:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1} -; NVPTX: [[META15:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1} -; NVPTX: [[META16:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} -; NVPTX: [[META17:![0-9]+]] = !{i32 7, !"openmp", i32 50} -; NVPTX: [[META18:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} -;. -; AMDGPU-DISABLED: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2} -; AMDGPU-DISABLED: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4} -; AMDGPU-DISABLED: [[META2:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_needed", i32 14, i32 0} -; AMDGPU-DISABLED: [[META3:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_with_fallback", i32 55, i32 3} -; AMDGPU-DISABLED: [[META4:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_pure", i32 77, i32 5} -; AMDGPU-DISABLED: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6} -; AMDGPU-DISABLED: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7} -; AMDGPU-DISABLED: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1} -; AMDGPU-DISABLED: [[META8:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1} -; AMDGPU-DISABLED: [[META9:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1} -; AMDGPU-DISABLED: [[META10:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1} -; AMDGPU-DISABLED: [[META11:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1} -; AMDGPU-DISABLED: [[META12:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1} -; AMDGPU-DISABLED: [[META13:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1} -; AMDGPU-DISABLED: [[META14:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1} -; AMDGPU-DISABLED: [[META15:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1} -; AMDGPU-DISABLED: [[META16:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} -; AMDGPU-DISABLED: [[META17:![0-9]+]] = !{i32 7, !"openmp", i32 50} -; AMDGPU-DISABLED: [[META18:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} -;. -; NVPTX-DISABLED: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2} -; NVPTX-DISABLED: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4} -; NVPTX-DISABLED: [[META2:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_needed", i32 14, i32 0} -; NVPTX-DISABLED: [[META3:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_with_fallback", i32 55, i32 3} -; NVPTX-DISABLED: [[META4:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_pure", i32 77, i32 5} -; NVPTX-DISABLED: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6} -; NVPTX-DISABLED: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7} -; NVPTX-DISABLED: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1} -; NVPTX-DISABLED: [[META8:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1} -; NVPTX-DISABLED: [[META9:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1} -; NVPTX-DISABLED: [[META10:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1} -; NVPTX-DISABLED: [[META11:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1} -; NVPTX-DISABLED: [[META12:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1} -; NVPTX-DISABLED: [[META13:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1} -; NVPTX-DISABLED: [[META14:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1} -; NVPTX-DISABLED: [[META15:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1} -; NVPTX-DISABLED: [[META16:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} -; NVPTX-DISABLED: [[META17:![0-9]+]] = !{i32 7, !"openmp", i32 50} -; NVPTX-DISABLED: [[META18:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} -;. diff --git a/llvm/test/Transforms/OpenMP/custom_state_machines_pre_lto.ll b/llvm/test/Transforms/OpenMP/custom_state_machines_pre_lto.ll index 3f60e2483d54e..d7633434c6615 100644 --- a/llvm/test/Transforms/OpenMP/custom_state_machines_pre_lto.ll +++ b/llvm/test/Transforms/OpenMP/custom_state_machines_pre_lto.ll @@ -1,10 +1,10 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals --include-generated-funcs -; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt < %s | FileCheck %s --check-prefix=AMDGPU1 -; RUN: opt --mtriple=nvptx64-- -S -passes=openmp-opt < %s | FileCheck %s --check-prefix=NVPTX1 -; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -openmp-opt-disable-state-machine-rewrite -S -passes=openmp-opt < %s | FileCheck %s --check-prefix=AMDGPU2 -; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt-postlink < %s | FileCheck %s --check-prefix=AMDGPU3 -; RUN: opt --mtriple=nvptx64-- -openmp-opt-disable-state-machine-rewrite -S -passes=openmp-opt < %s | FileCheck %s --check-prefix=NVPTX2 -; RUN: opt --mtriple=nvptx64-- -S -passes=openmp-opt-postlink < %s | FileCheck %s --check-prefix=NVPTX3 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --include-generated-funcs +; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt < %s | FileCheck %s --check-prefix=AMDGPU +; RUN: opt --mtriple=nvptx64-- -S -passes=openmp-opt < %s | FileCheck %s --check-prefix=NVPTX +; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -openmp-opt-disable-state-machine-rewrite -S -passes=openmp-opt < %s | FileCheck %s --check-prefix=AMDGPU +; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt-postlink < %s | FileCheck %s --check-prefix=AMDGPU +; RUN: opt --mtriple=nvptx64-- -openmp-opt-disable-state-machine-rewrite -S -passes=openmp-opt < %s | FileCheck %s --check-prefix=NVPTX +; RUN: opt --mtriple=nvptx64-- -S -passes=openmp-opt-postlink < %s | FileCheck %s --check-prefix=NVPTX ;; void p0(void); ;; void p1(void); @@ -122,29 +122,28 @@ ;; } %struct.ident_t = type { i32, i32, i32, i32, ptr } -%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr } -%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 } @0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8 +@__omp_offloading_14_a36502b_no_state_machine_needed_l14_exec_mode = weak constant i8 1 +@__omp_offloading_14_a36502b_simple_state_machine_l22_exec_mode = weak constant i8 1 +@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_exec_mode = weak constant i8 1 +@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_exec_mode = weak constant i8 1 +@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_exec_mode = weak constant i8 1 +@__omp_offloading_14_a36502b_simple_state_machine_pure_l77_exec_mode = weak constant i8 1 +@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_exec_mode = weak constant i8 1 +@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_exec_mode = weak constant i8 1 @2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @0 }, align 8 @G = external global i32, align 4 @3 = private unnamed_addr constant %struct.ident_t { i32 0, i32 322, i32 2, i32 0, ptr @0 }, align 8 -@__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null } -@__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null } -@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null } -@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null } -@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null } -@__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null } -@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null } -@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null } +@llvm.compiler.used = appending global [8 x ptr] [ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_exec_mode, ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_exec_mode, ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_exec_mode, ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_exec_mode, ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_exec_mode, ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_exec_mode, ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_exec_mode, ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_exec_mode], section "llvm.metadata" define weak void @__omp_offloading_14_a36502b_no_state_machine_needed_l14() #0 { entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 store i32 0, ptr %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment) + %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -152,7 +151,7 @@ user_code.entry: ; preds = %entry %1 = call i32 @__kmpc_global_thread_num(ptr @1) store i32 %1, ptr %.threadid_temp., align 4 call void @__omp_outlined__(ptr %.threadid_temp., ptr %.zero.addr) #3 - call void @__kmpc_target_deinit() + call void @__kmpc_target_deinit(ptr @1, i8 1) ret void worker.exit: ; preds = %entry @@ -160,7 +159,7 @@ worker.exit: ; preds = %entry } ; Make it a declaration so we will *not* apply custom state machine rewriting and wait for LTO. -declare i32 @__kmpc_target_init(ptr); +declare i32 @__kmpc_target_init(ptr, i8, i1); define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: @@ -194,14 +193,14 @@ declare void @unknown_no_openmp() #2 declare i32 @__kmpc_global_thread_num(ptr) #3 -declare void @__kmpc_target_deinit() +declare void @__kmpc_target_deinit(ptr, i8) define weak void @__omp_offloading_14_a36502b_simple_state_machine_l22() #0 { entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 store i32 0, ptr %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment) + %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -209,7 +208,7 @@ user_code.entry: ; preds = %entry %1 = call i32 @__kmpc_global_thread_num(ptr @1) store i32 %1, ptr %.threadid_temp., align 4 call void @__omp_outlined__1(ptr %.threadid_temp., ptr %.zero.addr) #3 - call void @__kmpc_target_deinit() + call void @__kmpc_target_deinit(ptr @1, i8 1) ret void worker.exit: ; preds = %entry @@ -294,7 +293,7 @@ entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 store i32 0, ptr %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment) + %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -302,7 +301,7 @@ user_code.entry: ; preds = %entry %1 = call i32 @__kmpc_global_thread_num(ptr @1) store i32 %1, ptr %.threadid_temp., align 4 call void @__omp_outlined__4(ptr %.threadid_temp., ptr %.zero.addr) #3 - call void @__kmpc_target_deinit() + call void @__kmpc_target_deinit(ptr @1, i8 1) ret void worker.exit: ; preds = %entry @@ -371,7 +370,7 @@ entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 store i32 0, ptr %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment) + %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -379,7 +378,7 @@ user_code.entry: ; preds = %entry %1 = call i32 @__kmpc_global_thread_num(ptr @1) store i32 %1, ptr %.threadid_temp., align 4 call void @__omp_outlined__6(ptr %.threadid_temp., ptr %.zero.addr) #3 - call void @__kmpc_target_deinit() + call void @__kmpc_target_deinit(ptr @1, i8 1) ret void worker.exit: ; preds = %entry @@ -457,7 +456,7 @@ entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 store i32 0, ptr %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment) + %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -465,7 +464,7 @@ user_code.entry: ; preds = %entry %1 = call i32 @__kmpc_global_thread_num(ptr @1) store i32 %1, ptr %.threadid_temp., align 4 call void @__omp_outlined__9(ptr %.threadid_temp., ptr %.zero.addr) #3 - call void @__kmpc_target_deinit() + call void @__kmpc_target_deinit(ptr @1, i8 1) ret void worker.exit: ; preds = %entry @@ -541,7 +540,7 @@ entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 store i32 0, ptr %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment) + %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -549,7 +548,7 @@ user_code.entry: ; preds = %entry %1 = call i32 @__kmpc_global_thread_num(ptr @1) store i32 %1, ptr %.threadid_temp., align 4 call void @__omp_outlined__12(ptr %.threadid_temp., ptr %.zero.addr) #3 - call void @__kmpc_target_deinit() + call void @__kmpc_target_deinit(ptr @1, i8 1) ret void worker.exit: ; preds = %entry @@ -628,7 +627,7 @@ entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 store i32 0, ptr %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment) + %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -636,7 +635,7 @@ user_code.entry: ; preds = %entry %1 = call i32 @__kmpc_global_thread_num(ptr @1) store i32 %1, ptr %.threadid_temp., align 4 call void @__omp_outlined__15(ptr %.threadid_temp., ptr %.zero.addr) #3 - call void @__kmpc_target_deinit() + call void @__kmpc_target_deinit(ptr @1, i8 1) ret void worker.exit: ; preds = %entry @@ -683,7 +682,7 @@ entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 store i32 0, ptr %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment) + %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -691,7 +690,7 @@ user_code.entry: ; preds = %entry %1 = call i32 @__kmpc_global_thread_num(ptr @1) store i32 %1, ptr %.threadid_temp., align 4 call void @__omp_outlined__16(ptr %.threadid_temp., ptr %.zero.addr) #3 - call void @__kmpc_target_deinit() + call void @__kmpc_target_deinit(ptr @1, i8 1) ret void worker.exit: ; preds = %entry @@ -833,4414 +832,1376 @@ attributes #9 = { convergent nounwind readonly willreturn } !16 = !{i32 1, !"wchar_size", i32 4} !17 = !{i32 7, !"openmp", i32 50} !18 = !{i32 7, !"openmp-device", i32 50} -;. -; AMDGPU1: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" -; AMDGPU1: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 -; AMDGPU1: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 2, i32 0, ptr @[[GLOB0]] }, align 8 -; AMDGPU1: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i32, align 4 -; AMDGPU1: @[[GLOB3:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 322, i32 2, i32 0, ptr @[[GLOB0]] }, align 8 -; AMDGPU1: @[[__OMP_OFFLOADING_14_A36502B_NO_STATE_MACHINE_NEEDED_L14_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null } -; AMDGPU1: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_L22_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; AMDGPU1: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_INTERPROCEDURAL_L39_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; AMDGPU1: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_WITH_FALLBACK_L55_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; AMDGPU1: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_NO_OPENMP_ATTR_L66_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; AMDGPU1: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_PURE_L77_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; AMDGPU1: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_INTERPROCEDURAL_NESTED_RECURSIVE_L92_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; AMDGPU1: @[[__OMP_OFFLOADING_14_A36502B_NO_STATE_MACHINE_WEAK_CALLEE_L112_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null } -;. -; NVPTX1: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" -; NVPTX1: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 -; NVPTX1: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 2, i32 0, ptr @[[GLOB0]] }, align 8 -; NVPTX1: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i32, align 4 -; NVPTX1: @[[GLOB3:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 322, i32 2, i32 0, ptr @[[GLOB0]] }, align 8 -; NVPTX1: @[[__OMP_OFFLOADING_14_A36502B_NO_STATE_MACHINE_NEEDED_L14_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null } -; NVPTX1: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_L22_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; NVPTX1: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_INTERPROCEDURAL_L39_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; NVPTX1: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_WITH_FALLBACK_L55_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; NVPTX1: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_NO_OPENMP_ATTR_L66_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; NVPTX1: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_PURE_L77_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; NVPTX1: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_INTERPROCEDURAL_NESTED_RECURSIVE_L92_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; NVPTX1: @[[__OMP_OFFLOADING_14_A36502B_NO_STATE_MACHINE_WEAK_CALLEE_L112_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null } -;. -; AMDGPU2: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" -; AMDGPU2: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 -; AMDGPU2: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 2, i32 0, ptr @[[GLOB0]] }, align 8 -; AMDGPU2: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i32, align 4 -; AMDGPU2: @[[GLOB3:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 322, i32 2, i32 0, ptr @[[GLOB0]] }, align 8 -; AMDGPU2: @[[__OMP_OFFLOADING_14_A36502B_NO_STATE_MACHINE_NEEDED_L14_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null } -; AMDGPU2: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_L22_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; AMDGPU2: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_INTERPROCEDURAL_L39_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; AMDGPU2: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_WITH_FALLBACK_L55_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; AMDGPU2: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_NO_OPENMP_ATTR_L66_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; AMDGPU2: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_PURE_L77_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; AMDGPU2: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_INTERPROCEDURAL_NESTED_RECURSIVE_L92_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; AMDGPU2: @[[__OMP_OFFLOADING_14_A36502B_NO_STATE_MACHINE_WEAK_CALLEE_L112_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null } -;. -; AMDGPU3: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" -; AMDGPU3: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 -; AMDGPU3: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 2, i32 0, ptr @[[GLOB0]] }, align 8 -; AMDGPU3: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i32, align 4 -; AMDGPU3: @[[GLOB3:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 322, i32 2, i32 0, ptr @[[GLOB0]] }, align 8 -; AMDGPU3: @[[__OMP_OFFLOADING_14_A36502B_NO_STATE_MACHINE_NEEDED_L14_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null } -; AMDGPU3: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_L22_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; AMDGPU3: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_INTERPROCEDURAL_L39_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; AMDGPU3: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_WITH_FALLBACK_L55_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; AMDGPU3: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_NO_OPENMP_ATTR_L66_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; AMDGPU3: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_PURE_L77_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; AMDGPU3: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_INTERPROCEDURAL_NESTED_RECURSIVE_L92_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; AMDGPU3: @[[__OMP_OFFLOADING_14_A36502B_NO_STATE_MACHINE_WEAK_CALLEE_L112_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null } -;. -; NVPTX2: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" -; NVPTX2: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 -; NVPTX2: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 2, i32 0, ptr @[[GLOB0]] }, align 8 -; NVPTX2: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i32, align 4 -; NVPTX2: @[[GLOB3:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 322, i32 2, i32 0, ptr @[[GLOB0]] }, align 8 -; NVPTX2: @[[__OMP_OFFLOADING_14_A36502B_NO_STATE_MACHINE_NEEDED_L14_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null } -; NVPTX2: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_L22_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; NVPTX2: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_INTERPROCEDURAL_L39_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; NVPTX2: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_WITH_FALLBACK_L55_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; NVPTX2: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_NO_OPENMP_ATTR_L66_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; NVPTX2: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_PURE_L77_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; NVPTX2: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_INTERPROCEDURAL_NESTED_RECURSIVE_L92_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; NVPTX2: @[[__OMP_OFFLOADING_14_A36502B_NO_STATE_MACHINE_WEAK_CALLEE_L112_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null } -;. -; NVPTX3: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" -; NVPTX3: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 -; NVPTX3: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 2, i32 0, ptr @[[GLOB0]] }, align 8 -; NVPTX3: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i32, align 4 -; NVPTX3: @[[GLOB3:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 322, i32 2, i32 0, ptr @[[GLOB0]] }, align 8 -; NVPTX3: @[[__OMP_OFFLOADING_14_A36502B_NO_STATE_MACHINE_NEEDED_L14_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null } -; NVPTX3: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_L22_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; NVPTX3: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_INTERPROCEDURAL_L39_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; NVPTX3: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_WITH_FALLBACK_L55_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; NVPTX3: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_NO_OPENMP_ATTR_L66_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; NVPTX3: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_PURE_L77_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; NVPTX3: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_INTERPROCEDURAL_NESTED_RECURSIVE_L92_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; NVPTX3: @[[__OMP_OFFLOADING_14_A36502B_NO_STATE_MACHINE_WEAK_CALLEE_L112_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null } -;. -; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14 -; AMDGPU1-SAME: () #[[ATTR0:[0-9]+]] { -; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment) -; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; AMDGPU1: user_code.entry: -; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]] -; AMDGPU1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU1-NEXT: call void @__kmpc_target_deinit() -; AMDGPU1-NEXT: ret void -; AMDGPU1: worker.exit: -; AMDGPU1-NEXT: ret void -; -; -; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__ -; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10:[0-9]+]] -; AMDGPU1-NEXT: call void @unknown_no_openmp() #[[ATTR11:[0-9]+]] -; AMDGPU1-NEXT: ret void -; -; -; AMDGPU1: Function Attrs: convergent noinline nounwind -; AMDGPU1-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized -; AMDGPU1-SAME: () #[[ATTR1:[0-9]+]] { -; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] -; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] -; AMDGPU1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 -; AMDGPU1-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] -; AMDGPU1: omp_if.then: -; AMDGPU1-NEXT: store i32 0, ptr @G, align 4 -; AMDGPU1-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] -; AMDGPU1-NEXT: br label [[OMP_IF_END]] -; AMDGPU1: omp_if.end: -; AMDGPU1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) #[[ATTR3]] -; AMDGPU1-NEXT: ret void -; -; -; AMDGPU1: Function Attrs: convergent noinline nounwind -; AMDGPU1-LABEL: define {{[^@]+}}@no_parallel_region_in_here -; AMDGPU1-SAME: () #[[ATTR1]] { -; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) -; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) -; AMDGPU1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 -; AMDGPU1-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] -; AMDGPU1: omp_if.then: -; AMDGPU1-NEXT: store i32 0, ptr @G, align 4 -; AMDGPU1-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) -; AMDGPU1-NEXT: br label [[OMP_IF_END]] -; AMDGPU1: omp_if.end: -; AMDGPU1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) -; AMDGPU1-NEXT: ret void -; -; -; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22 -; AMDGPU1-SAME: () #[[ATTR0]] { -; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment) -; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; AMDGPU1: user_code.entry: -; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] -; AMDGPU1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 -; AMDGPU1-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU1-NEXT: call void @__kmpc_target_deinit() -; AMDGPU1-NEXT: ret void -; AMDGPU1: worker.exit: -; AMDGPU1-NEXT: ret void -; -; -; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__1 -; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; AMDGPU1-NEXT: call void @unknown_no_openmp() #[[ATTR11]] -; AMDGPU1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; AMDGPU1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU1-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10]] -; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) -; AMDGPU1-NEXT: ret void -; -; -; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__2 -; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: call void @p0() #[[ATTR12:[0-9]+]] -; AMDGPU1-NEXT: ret void -; -; -; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper -; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU1-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU1-NEXT: ret void -; -; -; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__3 -; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: call void @p1() #[[ATTR12]] -; AMDGPU1-NEXT: ret void -; -; -; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper -; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU1-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU1-NEXT: ret void -; -; -; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39 -; AMDGPU1-SAME: () #[[ATTR0]] { -; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment) -; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; AMDGPU1: user_code.entry: -; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] -; AMDGPU1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 -; AMDGPU1-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU1-NEXT: call void @__kmpc_target_deinit() -; AMDGPU1-NEXT: ret void -; AMDGPU1: worker.exit: -; AMDGPU1-NEXT: ret void -; -; -; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__4 -; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; AMDGPU1-NEXT: call void @unknown_no_openmp() #[[ATTR11]] -; AMDGPU1-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR10]] -; AMDGPU1-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10]] -; AMDGPU1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; AMDGPU1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU1-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR10]] -; AMDGPU1-NEXT: ret void -; -; -; AMDGPU1: Function Attrs: noinline nounwind -; AMDGPU1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized -; AMDGPU1-SAME: () #[[ATTR6:[0-9]+]] { -; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] -; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU1-NEXT: ret void -; -; -; AMDGPU1: Function Attrs: convergent noinline nounwind -; AMDGPU1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before -; AMDGPU1-SAME: () #[[ATTR1]] { -; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) -; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU1-NEXT: ret void -; -; -; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__5 -; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: call void @p1() #[[ATTR12]] -; AMDGPU1-NEXT: ret void -; -; -; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper -; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU1-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU1-NEXT: ret void -; -; -; AMDGPU1: Function Attrs: noinline nounwind -; AMDGPU1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized -; AMDGPU1-SAME: () #[[ATTR6]] { -; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] -; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU1-NEXT: ret void -; -; -; AMDGPU1: Function Attrs: convergent noinline nounwind -; AMDGPU1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after -; AMDGPU1-SAME: () #[[ATTR1]] { -; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) -; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU1-NEXT: ret void -; -; -; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55 -; AMDGPU1-SAME: () #[[ATTR0]] { -; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment) -; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; AMDGPU1: user_code.entry: -; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] -; AMDGPU1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 -; AMDGPU1-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU1-NEXT: call void @__kmpc_target_deinit() -; AMDGPU1-NEXT: ret void -; AMDGPU1: worker.exit: -; AMDGPU1-NEXT: ret void -; -; -; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__6 -; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; AMDGPU1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; AMDGPU1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU1-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR12]] -; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) -; AMDGPU1-NEXT: ret void -; -; -; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__7 -; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: call void @p0() #[[ATTR12]] -; AMDGPU1-NEXT: ret void -; -; -; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper -; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU1-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU1-NEXT: ret void -; -; -; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__8 -; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: call void @p1() #[[ATTR12]] -; AMDGPU1-NEXT: ret void -; -; -; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper -; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU1-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU1-NEXT: ret void -; -; -; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66 -; AMDGPU1-SAME: () #[[ATTR0]] { -; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment) -; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; AMDGPU1: user_code.entry: -; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] -; AMDGPU1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 -; AMDGPU1-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU1-NEXT: call void @__kmpc_target_deinit() -; AMDGPU1-NEXT: ret void -; AMDGPU1: worker.exit: -; AMDGPU1-NEXT: ret void -; -; -; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__9 -; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; AMDGPU1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; AMDGPU1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU1-NEXT: call void @unknown_no_openmp() #[[ATTR11]] -; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) -; AMDGPU1-NEXT: ret void -; -; -; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__10 -; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: call void @p0() #[[ATTR12]] -; AMDGPU1-NEXT: ret void -; -; -; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper -; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU1-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU1-NEXT: ret void -; -; -; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__11 -; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: call void @p1() #[[ATTR12]] -; AMDGPU1-NEXT: ret void -; -; -; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper -; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU1-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU1-NEXT: ret void -; -; -; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77 -; AMDGPU1-SAME: () #[[ATTR0]] { -; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment) -; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; AMDGPU1: user_code.entry: -; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] -; AMDGPU1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 -; AMDGPU1-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU1-NEXT: call void @__kmpc_target_deinit() -; AMDGPU1-NEXT: ret void -; AMDGPU1: worker.exit: -; AMDGPU1-NEXT: ret void -; -; -; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__12 -; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; AMDGPU1-NEXT: call void @unknown_no_openmp() #[[ATTR11]] -; AMDGPU1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; AMDGPU1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) -; AMDGPU1-NEXT: ret void -; -; -; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__13 -; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: call void @p0() #[[ATTR12]] -; AMDGPU1-NEXT: ret void -; -; -; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper -; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU1-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU1-NEXT: ret void -; -; -; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__14 -; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: call void @p1() #[[ATTR12]] -; AMDGPU1-NEXT: ret void -; -; -; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper -; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU1-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU1-NEXT: ret void -; -; -; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92 -; AMDGPU1-SAME: () #[[ATTR0]] { -; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment) -; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; AMDGPU1: user_code.entry: -; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] -; AMDGPU1-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU1-NEXT: call void @__kmpc_target_deinit() -; AMDGPU1-NEXT: ret void -; AMDGPU1: worker.exit: -; AMDGPU1-NEXT: ret void -; -; -; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__15 -; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR12]] -; AMDGPU1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR10]] -; AMDGPU1-NEXT: ret void -; -; -; AMDGPU1: Function Attrs: noinline nounwind -; AMDGPU1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized -; AMDGPU1-SAME: (i32 [[A:%.*]]) #[[ATTR6]] { -; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -; AMDGPU1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -; AMDGPU1-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 -; AMDGPU1-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] -; AMDGPU1: if.then: -; AMDGPU1-NEXT: br label [[RETURN:%.*]] -; AMDGPU1: if.end: -; AMDGPU1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -; AMDGPU1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 -; AMDGPU1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR10]] -; AMDGPU1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR10]] -; AMDGPU1-NEXT: br label [[RETURN]] -; AMDGPU1: return: -; AMDGPU1-NEXT: ret void -; -; -; AMDGPU1: Function Attrs: convergent noinline nounwind -; AMDGPU1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after -; AMDGPU1-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { -; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -; AMDGPU1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -; AMDGPU1-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 -; AMDGPU1-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] -; AMDGPU1: if.then: -; AMDGPU1-NEXT: br label [[RETURN:%.*]] -; AMDGPU1: if.end: -; AMDGPU1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -; AMDGPU1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 -; AMDGPU1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR12]] -; AMDGPU1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR12]] -; AMDGPU1-NEXT: br label [[RETURN]] -; AMDGPU1: return: -; AMDGPU1-NEXT: ret void -; -; -; AMDGPU1: Function Attrs: noinline norecurse nounwind -; AMDGPU1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112 -; AMDGPU1-SAME: () #[[ATTR9:[0-9]+]] { -; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment) -; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; AMDGPU1: user_code.entry: -; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] -; AMDGPU1-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU1-NEXT: call void @__kmpc_target_deinit() -; AMDGPU1-NEXT: ret void -; AMDGPU1: worker.exit: -; AMDGPU1-NEXT: ret void -; -; -; AMDGPU1: Function Attrs: noinline norecurse nounwind -; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__16 -; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR9]] { -; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: call void @weak_callee_empty() #[[ATTR10]] -; AMDGPU1-NEXT: ret void -; -; -; AMDGPU1: Function Attrs: noinline nounwind -; AMDGPU1-LABEL: define {{[^@]+}}@weak_callee_empty -; AMDGPU1-SAME: () #[[ATTR6]] { -; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: ret void -; -; -; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__17 -; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: call void @p0() #[[ATTR12]] -; AMDGPU1-NEXT: ret void -; -; -; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper -; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU1-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU1-NEXT: ret void -; -; -; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__18 -; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: call void @p0() #[[ATTR12]] -; AMDGPU1-NEXT: ret void -; -; -; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper -; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU1-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU1-NEXT: ret void -; -; -; AMDGPU1: Function Attrs: noinline nounwind -; AMDGPU1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized -; AMDGPU1-SAME: () #[[ATTR6]] { -; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] -; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU1-NEXT: ret void -; -; -; AMDGPU1: Function Attrs: convergent noinline nounwind -; AMDGPU1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after -; AMDGPU1-SAME: () #[[ATTR1]] { -; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) -; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU1-NEXT: ret void -; -; -; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__19 -; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: call void @p0() #[[ATTR12]] -; AMDGPU1-NEXT: ret void -; -; -; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper -; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU1-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU1-NEXT: ret void -; -; -; NVPTX1: Function Attrs: convergent noinline norecurse nounwind -; NVPTX1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14 -; NVPTX1-SAME: () #[[ATTR0:[0-9]+]] { -; NVPTX1-NEXT: entry: -; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment) -; NVPTX1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; NVPTX1: user_code.entry: -; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]] -; NVPTX1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX1-NEXT: call void @__kmpc_target_deinit() -; NVPTX1-NEXT: ret void -; NVPTX1: worker.exit: -; NVPTX1-NEXT: ret void -; -; -; NVPTX1: Function Attrs: convergent noinline norecurse nounwind -; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__ -; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX1-NEXT: entry: -; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX1-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10:[0-9]+]] -; NVPTX1-NEXT: call void @unknown_no_openmp() #[[ATTR11:[0-9]+]] -; NVPTX1-NEXT: ret void -; -; -; NVPTX1: Function Attrs: convergent noinline nounwind -; NVPTX1-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized -; NVPTX1-SAME: () #[[ATTR1:[0-9]+]] { -; NVPTX1-NEXT: entry: -; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] -; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] -; NVPTX1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 -; NVPTX1-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] -; NVPTX1: omp_if.then: -; NVPTX1-NEXT: store i32 0, ptr @G, align 4 -; NVPTX1-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] -; NVPTX1-NEXT: br label [[OMP_IF_END]] -; NVPTX1: omp_if.end: -; NVPTX1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) #[[ATTR3]] -; NVPTX1-NEXT: ret void -; -; -; NVPTX1: Function Attrs: convergent noinline nounwind -; NVPTX1-LABEL: define {{[^@]+}}@no_parallel_region_in_here -; NVPTX1-SAME: () #[[ATTR1]] { -; NVPTX1-NEXT: entry: -; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) -; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) -; NVPTX1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 -; NVPTX1-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] -; NVPTX1: omp_if.then: -; NVPTX1-NEXT: store i32 0, ptr @G, align 4 -; NVPTX1-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) -; NVPTX1-NEXT: br label [[OMP_IF_END]] -; NVPTX1: omp_if.end: -; NVPTX1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) -; NVPTX1-NEXT: ret void -; -; -; NVPTX1: Function Attrs: convergent noinline norecurse nounwind -; NVPTX1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22 -; NVPTX1-SAME: () #[[ATTR0]] { -; NVPTX1-NEXT: entry: -; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment) -; NVPTX1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; NVPTX1: user_code.entry: -; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] -; NVPTX1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 -; NVPTX1-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX1-NEXT: call void @__kmpc_target_deinit() -; NVPTX1-NEXT: ret void -; NVPTX1: worker.exit: -; NVPTX1-NEXT: ret void -; -; -; NVPTX1: Function Attrs: convergent noinline norecurse nounwind -; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__1 -; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX1-NEXT: entry: -; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 -; NVPTX1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; NVPTX1-NEXT: call void @unknown_no_openmp() #[[ATTR11]] -; NVPTX1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; NVPTX1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX1-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10]] -; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) -; NVPTX1-NEXT: ret void -; -; -; NVPTX1: Function Attrs: convergent noinline norecurse nounwind -; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__2 -; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX1-NEXT: entry: -; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX1-NEXT: call void @p0() #[[ATTR12:[0-9]+]] -; NVPTX1-NEXT: ret void -; -; -; NVPTX1: Function Attrs: convergent noinline norecurse nounwind -; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper -; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; NVPTX1-NEXT: entry: -; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX1-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX1-NEXT: ret void -; -; -; NVPTX1: Function Attrs: convergent noinline norecurse nounwind -; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__3 -; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX1-NEXT: entry: -; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX1-NEXT: call void @p1() #[[ATTR12]] -; NVPTX1-NEXT: ret void -; -; -; NVPTX1: Function Attrs: convergent noinline norecurse nounwind -; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper -; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; NVPTX1-NEXT: entry: -; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX1-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX1-NEXT: ret void -; -; -; NVPTX1: Function Attrs: convergent noinline norecurse nounwind -; NVPTX1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39 -; NVPTX1-SAME: () #[[ATTR0]] { -; NVPTX1-NEXT: entry: -; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment) -; NVPTX1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; NVPTX1: user_code.entry: -; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] -; NVPTX1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 -; NVPTX1-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX1-NEXT: call void @__kmpc_target_deinit() -; NVPTX1-NEXT: ret void -; NVPTX1: worker.exit: -; NVPTX1-NEXT: ret void -; -; -; NVPTX1: Function Attrs: convergent noinline norecurse nounwind -; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__4 -; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX1-NEXT: entry: -; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; NVPTX1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; NVPTX1-NEXT: call void @unknown_no_openmp() #[[ATTR11]] -; NVPTX1-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR10]] -; NVPTX1-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10]] -; NVPTX1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; NVPTX1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX1-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR10]] -; NVPTX1-NEXT: ret void -; -; -; NVPTX1: Function Attrs: noinline nounwind -; NVPTX1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized -; NVPTX1-SAME: () #[[ATTR6:[0-9]+]] { -; NVPTX1-NEXT: entry: -; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] -; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX1-NEXT: ret void -; -; -; NVPTX1: Function Attrs: convergent noinline nounwind -; NVPTX1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before -; NVPTX1-SAME: () #[[ATTR1]] { -; NVPTX1-NEXT: entry: -; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) -; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX1-NEXT: ret void -; -; -; NVPTX1: Function Attrs: convergent noinline norecurse nounwind -; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__5 -; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX1-NEXT: entry: -; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX1-NEXT: call void @p1() #[[ATTR12]] -; NVPTX1-NEXT: ret void -; -; -; NVPTX1: Function Attrs: convergent noinline norecurse nounwind -; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper -; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; NVPTX1-NEXT: entry: -; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX1-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX1-NEXT: ret void -; -; -; NVPTX1: Function Attrs: noinline nounwind -; NVPTX1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized -; NVPTX1-SAME: () #[[ATTR6]] { -; NVPTX1-NEXT: entry: -; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] -; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX1-NEXT: ret void -; -; -; NVPTX1: Function Attrs: convergent noinline nounwind -; NVPTX1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after -; NVPTX1-SAME: () #[[ATTR1]] { -; NVPTX1-NEXT: entry: -; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) -; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX1-NEXT: ret void -; -; -; NVPTX1: Function Attrs: convergent noinline norecurse nounwind -; NVPTX1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55 -; NVPTX1-SAME: () #[[ATTR0]] { -; NVPTX1-NEXT: entry: -; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment) -; NVPTX1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; NVPTX1: user_code.entry: -; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] -; NVPTX1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 -; NVPTX1-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX1-NEXT: call void @__kmpc_target_deinit() -; NVPTX1-NEXT: ret void -; NVPTX1: worker.exit: -; NVPTX1-NEXT: ret void -; -; -; NVPTX1: Function Attrs: convergent noinline norecurse nounwind -; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__6 -; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX1-NEXT: entry: -; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 -; NVPTX1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; NVPTX1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; NVPTX1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX1-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR12]] -; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) -; NVPTX1-NEXT: ret void -; -; -; NVPTX1: Function Attrs: convergent noinline norecurse nounwind -; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__7 -; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX1-NEXT: entry: -; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX1-NEXT: call void @p0() #[[ATTR12]] -; NVPTX1-NEXT: ret void -; -; -; NVPTX1: Function Attrs: convergent noinline norecurse nounwind -; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper -; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; NVPTX1-NEXT: entry: -; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX1-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX1-NEXT: ret void -; -; -; NVPTX1: Function Attrs: convergent noinline norecurse nounwind -; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__8 -; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX1-NEXT: entry: -; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX1-NEXT: call void @p1() #[[ATTR12]] -; NVPTX1-NEXT: ret void -; -; -; NVPTX1: Function Attrs: convergent noinline norecurse nounwind -; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper -; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; NVPTX1-NEXT: entry: -; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX1-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX1-NEXT: ret void -; -; -; NVPTX1: Function Attrs: convergent noinline norecurse nounwind -; NVPTX1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66 -; NVPTX1-SAME: () #[[ATTR0]] { -; NVPTX1-NEXT: entry: -; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment) -; NVPTX1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; NVPTX1: user_code.entry: -; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] -; NVPTX1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 -; NVPTX1-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX1-NEXT: call void @__kmpc_target_deinit() -; NVPTX1-NEXT: ret void -; NVPTX1: worker.exit: -; NVPTX1-NEXT: ret void -; -; -; NVPTX1: Function Attrs: convergent noinline norecurse nounwind -; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__9 -; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX1-NEXT: entry: -; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 -; NVPTX1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; NVPTX1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; NVPTX1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX1-NEXT: call void @unknown_no_openmp() #[[ATTR11]] -; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) -; NVPTX1-NEXT: ret void -; -; -; NVPTX1: Function Attrs: convergent noinline norecurse nounwind -; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__10 -; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX1-NEXT: entry: -; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX1-NEXT: call void @p0() #[[ATTR12]] -; NVPTX1-NEXT: ret void -; -; -; NVPTX1: Function Attrs: convergent noinline norecurse nounwind -; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper -; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; NVPTX1-NEXT: entry: -; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX1-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX1-NEXT: ret void -; -; -; NVPTX1: Function Attrs: convergent noinline norecurse nounwind -; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__11 -; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX1-NEXT: entry: -; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX1-NEXT: call void @p1() #[[ATTR12]] -; NVPTX1-NEXT: ret void -; -; -; NVPTX1: Function Attrs: convergent noinline norecurse nounwind -; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper -; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; NVPTX1-NEXT: entry: -; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX1-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX1-NEXT: ret void -; -; -; NVPTX1: Function Attrs: convergent noinline norecurse nounwind -; NVPTX1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77 -; NVPTX1-SAME: () #[[ATTR0]] { -; NVPTX1-NEXT: entry: -; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment) -; NVPTX1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; NVPTX1: user_code.entry: -; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] -; NVPTX1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 -; NVPTX1-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX1-NEXT: call void @__kmpc_target_deinit() -; NVPTX1-NEXT: ret void -; NVPTX1: worker.exit: -; NVPTX1-NEXT: ret void -; -; -; NVPTX1: Function Attrs: convergent noinline norecurse nounwind -; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__12 -; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX1-NEXT: entry: -; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 -; NVPTX1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; NVPTX1-NEXT: call void @unknown_no_openmp() #[[ATTR11]] -; NVPTX1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; NVPTX1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) -; NVPTX1-NEXT: ret void -; -; -; NVPTX1: Function Attrs: convergent noinline norecurse nounwind -; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__13 -; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX1-NEXT: entry: -; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX1-NEXT: call void @p0() #[[ATTR12]] -; NVPTX1-NEXT: ret void -; -; -; NVPTX1: Function Attrs: convergent noinline norecurse nounwind -; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper -; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; NVPTX1-NEXT: entry: -; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX1-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX1-NEXT: ret void -; -; -; NVPTX1: Function Attrs: convergent noinline norecurse nounwind -; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__14 -; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX1-NEXT: entry: -; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX1-NEXT: call void @p1() #[[ATTR12]] -; NVPTX1-NEXT: ret void -; -; -; NVPTX1: Function Attrs: convergent noinline norecurse nounwind -; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper -; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; NVPTX1-NEXT: entry: -; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX1-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX1-NEXT: ret void -; -; -; NVPTX1: Function Attrs: convergent noinline norecurse nounwind -; NVPTX1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92 -; NVPTX1-SAME: () #[[ATTR0]] { -; NVPTX1-NEXT: entry: -; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment) -; NVPTX1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; NVPTX1: user_code.entry: -; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] -; NVPTX1-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX1-NEXT: call void @__kmpc_target_deinit() -; NVPTX1-NEXT: ret void -; NVPTX1: worker.exit: -; NVPTX1-NEXT: ret void -; -; -; NVPTX1: Function Attrs: convergent noinline norecurse nounwind -; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__15 -; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX1-NEXT: entry: -; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX1-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR12]] -; NVPTX1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR10]] -; NVPTX1-NEXT: ret void -; -; -; NVPTX1: Function Attrs: noinline nounwind -; NVPTX1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized -; NVPTX1-SAME: (i32 [[A:%.*]]) #[[ATTR6]] { -; NVPTX1-NEXT: entry: -; NVPTX1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; NVPTX1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -; NVPTX1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -; NVPTX1-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 -; NVPTX1-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] -; NVPTX1: if.then: -; NVPTX1-NEXT: br label [[RETURN:%.*]] -; NVPTX1: if.end: -; NVPTX1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -; NVPTX1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 -; NVPTX1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR10]] -; NVPTX1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR10]] -; NVPTX1-NEXT: br label [[RETURN]] -; NVPTX1: return: -; NVPTX1-NEXT: ret void -; -; -; NVPTX1: Function Attrs: convergent noinline nounwind -; NVPTX1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after -; NVPTX1-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { -; NVPTX1-NEXT: entry: -; NVPTX1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; NVPTX1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -; NVPTX1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -; NVPTX1-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 -; NVPTX1-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] -; NVPTX1: if.then: -; NVPTX1-NEXT: br label [[RETURN:%.*]] -; NVPTX1: if.end: -; NVPTX1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -; NVPTX1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 -; NVPTX1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR12]] -; NVPTX1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR12]] -; NVPTX1-NEXT: br label [[RETURN]] -; NVPTX1: return: -; NVPTX1-NEXT: ret void -; -; -; NVPTX1: Function Attrs: noinline norecurse nounwind -; NVPTX1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112 -; NVPTX1-SAME: () #[[ATTR9:[0-9]+]] { -; NVPTX1-NEXT: entry: -; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment) -; NVPTX1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; NVPTX1: user_code.entry: -; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] -; NVPTX1-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX1-NEXT: call void @__kmpc_target_deinit() -; NVPTX1-NEXT: ret void -; NVPTX1: worker.exit: -; NVPTX1-NEXT: ret void -; -; -; NVPTX1: Function Attrs: noinline norecurse nounwind -; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__16 -; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR9]] { -; NVPTX1-NEXT: entry: -; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX1-NEXT: call void @weak_callee_empty() #[[ATTR10]] -; NVPTX1-NEXT: ret void -; -; -; NVPTX1: Function Attrs: noinline nounwind -; NVPTX1-LABEL: define {{[^@]+}}@weak_callee_empty -; NVPTX1-SAME: () #[[ATTR6]] { -; NVPTX1-NEXT: entry: -; NVPTX1-NEXT: ret void -; -; -; NVPTX1: Function Attrs: convergent noinline norecurse nounwind -; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__17 -; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX1-NEXT: entry: -; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX1-NEXT: call void @p0() #[[ATTR12]] -; NVPTX1-NEXT: ret void -; -; -; NVPTX1: Function Attrs: convergent noinline norecurse nounwind -; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper -; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; NVPTX1-NEXT: entry: -; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX1-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX1-NEXT: ret void -; -; -; NVPTX1: Function Attrs: convergent noinline norecurse nounwind -; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__18 -; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX1-NEXT: entry: -; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX1-NEXT: call void @p0() #[[ATTR12]] -; NVPTX1-NEXT: ret void -; -; -; NVPTX1: Function Attrs: convergent noinline norecurse nounwind -; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper -; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; NVPTX1-NEXT: entry: -; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX1-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX1-NEXT: ret void -; -; -; NVPTX1: Function Attrs: noinline nounwind -; NVPTX1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized -; NVPTX1-SAME: () #[[ATTR6]] { -; NVPTX1-NEXT: entry: -; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] -; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX1-NEXT: ret void -; -; -; NVPTX1: Function Attrs: convergent noinline nounwind -; NVPTX1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after -; NVPTX1-SAME: () #[[ATTR1]] { -; NVPTX1-NEXT: entry: -; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) -; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX1-NEXT: ret void -; -; -; NVPTX1: Function Attrs: convergent noinline norecurse nounwind -; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__19 -; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX1-NEXT: entry: -; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX1-NEXT: call void @p0() #[[ATTR12]] -; NVPTX1-NEXT: ret void -; -; -; NVPTX1: Function Attrs: convergent noinline norecurse nounwind -; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper -; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; NVPTX1-NEXT: entry: -; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX1-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX1-NEXT: ret void -; -; -; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14 -; AMDGPU2-SAME: () #[[ATTR0:[0-9]+]] { -; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment) -; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; AMDGPU2: user_code.entry: -; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]] -; AMDGPU2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU2-NEXT: call void @__kmpc_target_deinit() -; AMDGPU2-NEXT: ret void -; AMDGPU2: worker.exit: -; AMDGPU2-NEXT: ret void -; -; -; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__ -; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10:[0-9]+]] -; AMDGPU2-NEXT: call void @unknown_no_openmp() #[[ATTR11:[0-9]+]] -; AMDGPU2-NEXT: ret void -; -; -; AMDGPU2: Function Attrs: convergent noinline nounwind -; AMDGPU2-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized -; AMDGPU2-SAME: () #[[ATTR1:[0-9]+]] { -; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] -; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] -; AMDGPU2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 -; AMDGPU2-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] -; AMDGPU2: omp_if.then: -; AMDGPU2-NEXT: store i32 0, ptr @G, align 4 -; AMDGPU2-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] -; AMDGPU2-NEXT: br label [[OMP_IF_END]] -; AMDGPU2: omp_if.end: -; AMDGPU2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) #[[ATTR3]] -; AMDGPU2-NEXT: ret void -; -; -; AMDGPU2: Function Attrs: convergent noinline nounwind -; AMDGPU2-LABEL: define {{[^@]+}}@no_parallel_region_in_here -; AMDGPU2-SAME: () #[[ATTR1]] { -; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) -; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) -; AMDGPU2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 -; AMDGPU2-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] -; AMDGPU2: omp_if.then: -; AMDGPU2-NEXT: store i32 0, ptr @G, align 4 -; AMDGPU2-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) -; AMDGPU2-NEXT: br label [[OMP_IF_END]] -; AMDGPU2: omp_if.end: -; AMDGPU2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) -; AMDGPU2-NEXT: ret void -; -; -; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22 -; AMDGPU2-SAME: () #[[ATTR0]] { -; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment) -; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; AMDGPU2: user_code.entry: -; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] -; AMDGPU2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 -; AMDGPU2-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU2-NEXT: call void @__kmpc_target_deinit() -; AMDGPU2-NEXT: ret void -; AMDGPU2: worker.exit: -; AMDGPU2-NEXT: ret void -; -; -; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__1 -; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; AMDGPU2-NEXT: call void @unknown_no_openmp() #[[ATTR11]] -; AMDGPU2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; AMDGPU2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU2-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10]] -; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) -; AMDGPU2-NEXT: ret void -; -; -; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__2 -; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: call void @p0() #[[ATTR12:[0-9]+]] -; AMDGPU2-NEXT: ret void -; -; -; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper -; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU2-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU2-NEXT: ret void -; -; -; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__3 -; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: call void @p1() #[[ATTR12]] -; AMDGPU2-NEXT: ret void -; -; -; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper -; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU2-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU2-NEXT: ret void -; -; -; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39 -; AMDGPU2-SAME: () #[[ATTR0]] { -; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment) -; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; AMDGPU2: user_code.entry: -; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] -; AMDGPU2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 -; AMDGPU2-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU2-NEXT: call void @__kmpc_target_deinit() -; AMDGPU2-NEXT: ret void -; AMDGPU2: worker.exit: -; AMDGPU2-NEXT: ret void -; -; -; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__4 -; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; AMDGPU2-NEXT: call void @unknown_no_openmp() #[[ATTR11]] -; AMDGPU2-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR10]] -; AMDGPU2-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10]] -; AMDGPU2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; AMDGPU2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU2-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR10]] -; AMDGPU2-NEXT: ret void -; -; -; AMDGPU2: Function Attrs: noinline nounwind -; AMDGPU2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized -; AMDGPU2-SAME: () #[[ATTR6:[0-9]+]] { -; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] -; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU2-NEXT: ret void -; -; -; AMDGPU2: Function Attrs: convergent noinline nounwind -; AMDGPU2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before -; AMDGPU2-SAME: () #[[ATTR1]] { -; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) -; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU2-NEXT: ret void -; -; -; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__5 -; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: call void @p1() #[[ATTR12]] -; AMDGPU2-NEXT: ret void -; -; -; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper -; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU2-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU2-NEXT: ret void -; -; -; AMDGPU2: Function Attrs: noinline nounwind -; AMDGPU2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized -; AMDGPU2-SAME: () #[[ATTR6]] { -; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] -; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU2-NEXT: ret void -; -; -; AMDGPU2: Function Attrs: convergent noinline nounwind -; AMDGPU2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after -; AMDGPU2-SAME: () #[[ATTR1]] { -; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) -; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU2-NEXT: ret void -; -; -; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55 -; AMDGPU2-SAME: () #[[ATTR0]] { -; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment) -; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; AMDGPU2: user_code.entry: -; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] -; AMDGPU2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 -; AMDGPU2-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU2-NEXT: call void @__kmpc_target_deinit() -; AMDGPU2-NEXT: ret void -; AMDGPU2: worker.exit: -; AMDGPU2-NEXT: ret void -; -; -; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__6 -; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; AMDGPU2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; AMDGPU2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU2-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR12]] -; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) -; AMDGPU2-NEXT: ret void -; -; -; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__7 -; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: call void @p0() #[[ATTR12]] -; AMDGPU2-NEXT: ret void -; -; -; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper -; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU2-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU2-NEXT: ret void -; -; -; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__8 -; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: call void @p1() #[[ATTR12]] -; AMDGPU2-NEXT: ret void -; -; -; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper -; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU2-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU2-NEXT: ret void -; -; -; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66 -; AMDGPU2-SAME: () #[[ATTR0]] { -; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment) -; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; AMDGPU2: user_code.entry: -; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] -; AMDGPU2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 -; AMDGPU2-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU2-NEXT: call void @__kmpc_target_deinit() -; AMDGPU2-NEXT: ret void -; AMDGPU2: worker.exit: -; AMDGPU2-NEXT: ret void -; -; -; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__9 -; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; AMDGPU2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; AMDGPU2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU2-NEXT: call void @unknown_no_openmp() #[[ATTR11]] -; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) -; AMDGPU2-NEXT: ret void -; -; -; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__10 -; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: call void @p0() #[[ATTR12]] -; AMDGPU2-NEXT: ret void -; -; -; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper -; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU2-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU2-NEXT: ret void -; -; -; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__11 -; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: call void @p1() #[[ATTR12]] -; AMDGPU2-NEXT: ret void -; -; -; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper -; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU2-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU2-NEXT: ret void -; -; -; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77 -; AMDGPU2-SAME: () #[[ATTR0]] { -; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment) -; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; AMDGPU2: user_code.entry: -; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] -; AMDGPU2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 -; AMDGPU2-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU2-NEXT: call void @__kmpc_target_deinit() -; AMDGPU2-NEXT: ret void -; AMDGPU2: worker.exit: -; AMDGPU2-NEXT: ret void -; -; -; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__12 -; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; AMDGPU2-NEXT: call void @unknown_no_openmp() #[[ATTR11]] -; AMDGPU2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; AMDGPU2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) -; AMDGPU2-NEXT: ret void -; -; -; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__13 -; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: call void @p0() #[[ATTR12]] -; AMDGPU2-NEXT: ret void -; -; -; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper -; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU2-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU2-NEXT: ret void -; -; -; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__14 -; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: call void @p1() #[[ATTR12]] -; AMDGPU2-NEXT: ret void -; -; -; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper -; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU2-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU2-NEXT: ret void -; -; -; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92 -; AMDGPU2-SAME: () #[[ATTR0]] { -; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment) -; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; AMDGPU2: user_code.entry: -; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] -; AMDGPU2-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU2-NEXT: call void @__kmpc_target_deinit() -; AMDGPU2-NEXT: ret void -; AMDGPU2: worker.exit: -; AMDGPU2-NEXT: ret void -; -; -; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__15 -; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR12]] -; AMDGPU2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR10]] -; AMDGPU2-NEXT: ret void -; -; -; AMDGPU2: Function Attrs: noinline nounwind -; AMDGPU2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized -; AMDGPU2-SAME: (i32 [[A:%.*]]) #[[ATTR6]] { -; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -; AMDGPU2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -; AMDGPU2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 -; AMDGPU2-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] -; AMDGPU2: if.then: -; AMDGPU2-NEXT: br label [[RETURN:%.*]] -; AMDGPU2: if.end: -; AMDGPU2-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -; AMDGPU2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 -; AMDGPU2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR10]] -; AMDGPU2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR10]] -; AMDGPU2-NEXT: br label [[RETURN]] -; AMDGPU2: return: -; AMDGPU2-NEXT: ret void -; -; -; AMDGPU2: Function Attrs: convergent noinline nounwind -; AMDGPU2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after -; AMDGPU2-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { -; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -; AMDGPU2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -; AMDGPU2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 -; AMDGPU2-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] -; AMDGPU2: if.then: -; AMDGPU2-NEXT: br label [[RETURN:%.*]] -; AMDGPU2: if.end: -; AMDGPU2-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -; AMDGPU2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 -; AMDGPU2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR12]] -; AMDGPU2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR12]] -; AMDGPU2-NEXT: br label [[RETURN]] -; AMDGPU2: return: -; AMDGPU2-NEXT: ret void -; -; -; AMDGPU2: Function Attrs: noinline norecurse nounwind -; AMDGPU2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112 -; AMDGPU2-SAME: () #[[ATTR9:[0-9]+]] { -; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment) -; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; AMDGPU2: user_code.entry: -; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] -; AMDGPU2-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU2-NEXT: call void @__kmpc_target_deinit() -; AMDGPU2-NEXT: ret void -; AMDGPU2: worker.exit: -; AMDGPU2-NEXT: ret void -; -; -; AMDGPU2: Function Attrs: noinline norecurse nounwind -; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__16 -; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR9]] { -; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: call void @weak_callee_empty() #[[ATTR10]] -; AMDGPU2-NEXT: ret void -; -; -; AMDGPU2: Function Attrs: noinline nounwind -; AMDGPU2-LABEL: define {{[^@]+}}@weak_callee_empty -; AMDGPU2-SAME: () #[[ATTR6]] { -; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: ret void -; -; -; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__17 -; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: call void @p0() #[[ATTR12]] -; AMDGPU2-NEXT: ret void -; -; -; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper -; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU2-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU2-NEXT: ret void -; -; -; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__18 -; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: call void @p0() #[[ATTR12]] -; AMDGPU2-NEXT: ret void -; -; -; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper -; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU2-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU2-NEXT: ret void -; -; -; AMDGPU2: Function Attrs: noinline nounwind -; AMDGPU2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized -; AMDGPU2-SAME: () #[[ATTR6]] { -; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] -; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU2-NEXT: ret void -; -; -; AMDGPU2: Function Attrs: convergent noinline nounwind -; AMDGPU2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after -; AMDGPU2-SAME: () #[[ATTR1]] { -; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) -; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU2-NEXT: ret void -; -; -; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__19 -; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: call void @p0() #[[ATTR12]] -; AMDGPU2-NEXT: ret void -; -; -; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper -; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU2-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU2-NEXT: ret void -; -; -; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14 -; AMDGPU3-SAME: () #[[ATTR0:[0-9]+]] { -; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment) -; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; AMDGPU3: user_code.entry: -; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]] -; AMDGPU3-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU3-NEXT: call void @__kmpc_target_deinit() -; AMDGPU3-NEXT: ret void -; AMDGPU3: worker.exit: -; AMDGPU3-NEXT: ret void -; -; -; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__ -; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10:[0-9]+]] -; AMDGPU3-NEXT: call void @unknown_no_openmp() #[[ATTR11:[0-9]+]] -; AMDGPU3-NEXT: ret void -; -; -; AMDGPU3: Function Attrs: convergent noinline nounwind -; AMDGPU3-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized -; AMDGPU3-SAME: () #[[ATTR1:[0-9]+]] { -; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] -; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] -; AMDGPU3-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 -; AMDGPU3-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] -; AMDGPU3: omp_if.then: -; AMDGPU3-NEXT: store i32 0, ptr @G, align 4 -; AMDGPU3-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] -; AMDGPU3-NEXT: br label [[OMP_IF_END]] -; AMDGPU3: omp_if.end: -; AMDGPU3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) #[[ATTR3]] -; AMDGPU3-NEXT: ret void -; -; -; AMDGPU3: Function Attrs: convergent noinline nounwind -; AMDGPU3-LABEL: define {{[^@]+}}@no_parallel_region_in_here -; AMDGPU3-SAME: () #[[ATTR1]] { -; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) -; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) -; AMDGPU3-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 -; AMDGPU3-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] -; AMDGPU3: omp_if.then: -; AMDGPU3-NEXT: store i32 0, ptr @G, align 4 -; AMDGPU3-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) -; AMDGPU3-NEXT: br label [[OMP_IF_END]] -; AMDGPU3: omp_if.end: -; AMDGPU3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) -; AMDGPU3-NEXT: ret void -; -; -; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22 -; AMDGPU3-SAME: () #[[ATTR0]] { -; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment) -; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; AMDGPU3: user_code.entry: -; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] -; AMDGPU3-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 -; AMDGPU3-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU3-NEXT: call void @__kmpc_target_deinit() -; AMDGPU3-NEXT: ret void -; AMDGPU3: worker.exit: -; AMDGPU3-NEXT: ret void -; -; -; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__1 -; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; AMDGPU3-NEXT: call void @unknown_no_openmp() #[[ATTR11]] -; AMDGPU3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; AMDGPU3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU3-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10]] -; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) -; AMDGPU3-NEXT: ret void -; -; -; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__2 -; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: call void @p0() #[[ATTR12:[0-9]+]] -; AMDGPU3-NEXT: ret void -; -; -; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper -; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU3-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU3-NEXT: ret void -; -; -; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__3 -; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: call void @p1() #[[ATTR12]] -; AMDGPU3-NEXT: ret void -; -; -; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper -; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU3-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU3-NEXT: ret void -; -; -; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39 -; AMDGPU3-SAME: () #[[ATTR0]] { -; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment) -; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; AMDGPU3: user_code.entry: -; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] -; AMDGPU3-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 -; AMDGPU3-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU3-NEXT: call void @__kmpc_target_deinit() -; AMDGPU3-NEXT: ret void -; AMDGPU3: worker.exit: -; AMDGPU3-NEXT: ret void -; -; -; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__4 -; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; AMDGPU3-NEXT: call void @unknown_no_openmp() #[[ATTR11]] -; AMDGPU3-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR10]] -; AMDGPU3-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10]] -; AMDGPU3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; AMDGPU3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU3-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR10]] -; AMDGPU3-NEXT: ret void -; -; -; AMDGPU3: Function Attrs: noinline nounwind -; AMDGPU3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized -; AMDGPU3-SAME: () #[[ATTR6:[0-9]+]] { -; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] -; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU3-NEXT: ret void -; -; -; AMDGPU3: Function Attrs: convergent noinline nounwind -; AMDGPU3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before -; AMDGPU3-SAME: () #[[ATTR1]] { -; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) -; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU3-NEXT: ret void -; -; -; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__5 -; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: call void @p1() #[[ATTR12]] -; AMDGPU3-NEXT: ret void -; -; -; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper -; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU3-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU3-NEXT: ret void -; -; -; AMDGPU3: Function Attrs: noinline nounwind -; AMDGPU3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized -; AMDGPU3-SAME: () #[[ATTR6]] { -; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] -; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU3-NEXT: ret void -; -; -; AMDGPU3: Function Attrs: convergent noinline nounwind -; AMDGPU3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after -; AMDGPU3-SAME: () #[[ATTR1]] { -; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) -; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU3-NEXT: ret void -; -; -; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55 -; AMDGPU3-SAME: () #[[ATTR0]] { -; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment) -; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; AMDGPU3: user_code.entry: -; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] -; AMDGPU3-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 -; AMDGPU3-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU3-NEXT: call void @__kmpc_target_deinit() -; AMDGPU3-NEXT: ret void -; AMDGPU3: worker.exit: -; AMDGPU3-NEXT: ret void -; -; -; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__6 -; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; AMDGPU3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; AMDGPU3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU3-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR12]] -; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) -; AMDGPU3-NEXT: ret void -; -; -; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__7 -; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: call void @p0() #[[ATTR12]] -; AMDGPU3-NEXT: ret void -; -; -; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper -; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU3-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU3-NEXT: ret void -; -; -; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__8 -; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: call void @p1() #[[ATTR12]] -; AMDGPU3-NEXT: ret void -; -; -; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper -; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU3-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU3-NEXT: ret void -; -; -; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66 -; AMDGPU3-SAME: () #[[ATTR0]] { -; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment) -; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; AMDGPU3: user_code.entry: -; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] -; AMDGPU3-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 -; AMDGPU3-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU3-NEXT: call void @__kmpc_target_deinit() -; AMDGPU3-NEXT: ret void -; AMDGPU3: worker.exit: -; AMDGPU3-NEXT: ret void -; -; -; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__9 -; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; AMDGPU3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; AMDGPU3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU3-NEXT: call void @unknown_no_openmp() #[[ATTR11]] -; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) -; AMDGPU3-NEXT: ret void -; -; -; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__10 -; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: call void @p0() #[[ATTR12]] -; AMDGPU3-NEXT: ret void -; -; -; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper -; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU3-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU3-NEXT: ret void -; -; -; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__11 -; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: call void @p1() #[[ATTR12]] -; AMDGPU3-NEXT: ret void -; -; -; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper -; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU3-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU3-NEXT: ret void -; -; -; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77 -; AMDGPU3-SAME: () #[[ATTR0]] { -; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment) -; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; AMDGPU3: user_code.entry: -; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] -; AMDGPU3-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 -; AMDGPU3-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU3-NEXT: call void @__kmpc_target_deinit() -; AMDGPU3-NEXT: ret void -; AMDGPU3: worker.exit: -; AMDGPU3-NEXT: ret void -; -; -; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__12 -; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; AMDGPU3-NEXT: call void @unknown_no_openmp() #[[ATTR11]] -; AMDGPU3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; AMDGPU3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) -; AMDGPU3-NEXT: ret void -; -; -; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__13 -; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: call void @p0() #[[ATTR12]] -; AMDGPU3-NEXT: ret void -; -; -; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper -; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU3-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU3-NEXT: ret void -; -; -; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__14 -; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: call void @p1() #[[ATTR12]] -; AMDGPU3-NEXT: ret void -; -; -; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper -; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU3-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU3-NEXT: ret void -; -; -; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92 -; AMDGPU3-SAME: () #[[ATTR0]] { -; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment) -; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; AMDGPU3: user_code.entry: -; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] -; AMDGPU3-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU3-NEXT: call void @__kmpc_target_deinit() -; AMDGPU3-NEXT: ret void -; AMDGPU3: worker.exit: -; AMDGPU3-NEXT: ret void -; -; -; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__15 -; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR12]] -; AMDGPU3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR10]] -; AMDGPU3-NEXT: ret void -; -; -; AMDGPU3: Function Attrs: noinline nounwind -; AMDGPU3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized -; AMDGPU3-SAME: (i32 [[A:%.*]]) #[[ATTR6]] { -; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -; AMDGPU3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -; AMDGPU3-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 -; AMDGPU3-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] -; AMDGPU3: if.then: -; AMDGPU3-NEXT: br label [[RETURN:%.*]] -; AMDGPU3: if.end: -; AMDGPU3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -; AMDGPU3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 -; AMDGPU3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR10]] -; AMDGPU3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR10]] -; AMDGPU3-NEXT: br label [[RETURN]] -; AMDGPU3: return: -; AMDGPU3-NEXT: ret void -; -; -; AMDGPU3: Function Attrs: convergent noinline nounwind -; AMDGPU3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after -; AMDGPU3-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { -; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -; AMDGPU3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -; AMDGPU3-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 -; AMDGPU3-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] -; AMDGPU3: if.then: -; AMDGPU3-NEXT: br label [[RETURN:%.*]] -; AMDGPU3: if.end: -; AMDGPU3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -; AMDGPU3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 -; AMDGPU3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR12]] -; AMDGPU3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR12]] -; AMDGPU3-NEXT: br label [[RETURN]] -; AMDGPU3: return: -; AMDGPU3-NEXT: ret void -; -; -; AMDGPU3: Function Attrs: noinline norecurse nounwind -; AMDGPU3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112 -; AMDGPU3-SAME: () #[[ATTR9:[0-9]+]] { -; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment) -; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; AMDGPU3: user_code.entry: -; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] -; AMDGPU3-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU3-NEXT: call void @__kmpc_target_deinit() -; AMDGPU3-NEXT: ret void -; AMDGPU3: worker.exit: -; AMDGPU3-NEXT: ret void -; -; -; AMDGPU3: Function Attrs: noinline norecurse nounwind -; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__16 -; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR9]] { -; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: call void @weak_callee_empty() #[[ATTR10]] -; AMDGPU3-NEXT: ret void -; -; -; AMDGPU3: Function Attrs: noinline nounwind -; AMDGPU3-LABEL: define {{[^@]+}}@weak_callee_empty -; AMDGPU3-SAME: () #[[ATTR6]] { -; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: ret void -; -; -; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__17 -; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: call void @p0() #[[ATTR12]] -; AMDGPU3-NEXT: ret void -; -; -; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper -; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU3-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU3-NEXT: ret void -; -; -; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__18 -; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: call void @p0() #[[ATTR12]] -; AMDGPU3-NEXT: ret void -; -; -; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper -; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU3-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU3-NEXT: ret void -; -; -; AMDGPU3: Function Attrs: noinline nounwind -; AMDGPU3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized -; AMDGPU3-SAME: () #[[ATTR6]] { -; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] -; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU3-NEXT: ret void -; -; -; AMDGPU3: Function Attrs: convergent noinline nounwind -; AMDGPU3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after -; AMDGPU3-SAME: () #[[ATTR1]] { -; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) -; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU3-NEXT: ret void -; -; -; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__19 -; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: call void @p0() #[[ATTR12]] -; AMDGPU3-NEXT: ret void -; -; -; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper -; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU3-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU3-NEXT: ret void -; -; -; NVPTX2: Function Attrs: convergent noinline norecurse nounwind -; NVPTX2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14 -; NVPTX2-SAME: () #[[ATTR0:[0-9]+]] { -; NVPTX2-NEXT: entry: -; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment) -; NVPTX2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; NVPTX2: user_code.entry: -; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]] -; NVPTX2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX2-NEXT: call void @__kmpc_target_deinit() -; NVPTX2-NEXT: ret void -; NVPTX2: worker.exit: -; NVPTX2-NEXT: ret void -; -; -; NVPTX2: Function Attrs: convergent noinline norecurse nounwind -; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__ -; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX2-NEXT: entry: -; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX2-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10:[0-9]+]] -; NVPTX2-NEXT: call void @unknown_no_openmp() #[[ATTR11:[0-9]+]] -; NVPTX2-NEXT: ret void -; -; -; NVPTX2: Function Attrs: convergent noinline nounwind -; NVPTX2-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized -; NVPTX2-SAME: () #[[ATTR1:[0-9]+]] { -; NVPTX2-NEXT: entry: -; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] -; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] -; NVPTX2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 -; NVPTX2-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] -; NVPTX2: omp_if.then: -; NVPTX2-NEXT: store i32 0, ptr @G, align 4 -; NVPTX2-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] -; NVPTX2-NEXT: br label [[OMP_IF_END]] -; NVPTX2: omp_if.end: -; NVPTX2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) #[[ATTR3]] -; NVPTX2-NEXT: ret void -; -; -; NVPTX2: Function Attrs: convergent noinline nounwind -; NVPTX2-LABEL: define {{[^@]+}}@no_parallel_region_in_here -; NVPTX2-SAME: () #[[ATTR1]] { -; NVPTX2-NEXT: entry: -; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) -; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) -; NVPTX2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 -; NVPTX2-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] -; NVPTX2: omp_if.then: -; NVPTX2-NEXT: store i32 0, ptr @G, align 4 -; NVPTX2-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) -; NVPTX2-NEXT: br label [[OMP_IF_END]] -; NVPTX2: omp_if.end: -; NVPTX2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) -; NVPTX2-NEXT: ret void -; -; -; NVPTX2: Function Attrs: convergent noinline norecurse nounwind -; NVPTX2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22 -; NVPTX2-SAME: () #[[ATTR0]] { -; NVPTX2-NEXT: entry: -; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment) -; NVPTX2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; NVPTX2: user_code.entry: -; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] -; NVPTX2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 -; NVPTX2-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX2-NEXT: call void @__kmpc_target_deinit() -; NVPTX2-NEXT: ret void -; NVPTX2: worker.exit: -; NVPTX2-NEXT: ret void -; -; -; NVPTX2: Function Attrs: convergent noinline norecurse nounwind -; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__1 -; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX2-NEXT: entry: -; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 -; NVPTX2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; NVPTX2-NEXT: call void @unknown_no_openmp() #[[ATTR11]] -; NVPTX2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; NVPTX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX2-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10]] -; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) -; NVPTX2-NEXT: ret void -; -; -; NVPTX2: Function Attrs: convergent noinline norecurse nounwind -; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__2 -; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX2-NEXT: entry: -; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX2-NEXT: call void @p0() #[[ATTR12:[0-9]+]] -; NVPTX2-NEXT: ret void -; -; -; NVPTX2: Function Attrs: convergent noinline norecurse nounwind -; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper -; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; NVPTX2-NEXT: entry: -; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX2-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX2-NEXT: ret void -; -; -; NVPTX2: Function Attrs: convergent noinline norecurse nounwind -; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__3 -; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX2-NEXT: entry: -; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX2-NEXT: call void @p1() #[[ATTR12]] -; NVPTX2-NEXT: ret void -; -; -; NVPTX2: Function Attrs: convergent noinline norecurse nounwind -; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper -; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; NVPTX2-NEXT: entry: -; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX2-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX2-NEXT: ret void -; -; -; NVPTX2: Function Attrs: convergent noinline norecurse nounwind -; NVPTX2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39 -; NVPTX2-SAME: () #[[ATTR0]] { -; NVPTX2-NEXT: entry: -; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment) -; NVPTX2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; NVPTX2: user_code.entry: -; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] -; NVPTX2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 -; NVPTX2-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX2-NEXT: call void @__kmpc_target_deinit() -; NVPTX2-NEXT: ret void -; NVPTX2: worker.exit: -; NVPTX2-NEXT: ret void -; -; -; NVPTX2: Function Attrs: convergent noinline norecurse nounwind -; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__4 -; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX2-NEXT: entry: -; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; NVPTX2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; NVPTX2-NEXT: call void @unknown_no_openmp() #[[ATTR11]] -; NVPTX2-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR10]] -; NVPTX2-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10]] -; NVPTX2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; NVPTX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX2-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR10]] -; NVPTX2-NEXT: ret void -; -; -; NVPTX2: Function Attrs: noinline nounwind -; NVPTX2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized -; NVPTX2-SAME: () #[[ATTR6:[0-9]+]] { -; NVPTX2-NEXT: entry: -; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] -; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX2-NEXT: ret void -; -; -; NVPTX2: Function Attrs: convergent noinline nounwind -; NVPTX2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before -; NVPTX2-SAME: () #[[ATTR1]] { -; NVPTX2-NEXT: entry: -; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) -; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX2-NEXT: ret void -; -; -; NVPTX2: Function Attrs: convergent noinline norecurse nounwind -; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__5 -; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX2-NEXT: entry: -; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX2-NEXT: call void @p1() #[[ATTR12]] -; NVPTX2-NEXT: ret void -; -; -; NVPTX2: Function Attrs: convergent noinline norecurse nounwind -; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper -; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; NVPTX2-NEXT: entry: -; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX2-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX2-NEXT: ret void -; -; -; NVPTX2: Function Attrs: noinline nounwind -; NVPTX2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized -; NVPTX2-SAME: () #[[ATTR6]] { -; NVPTX2-NEXT: entry: -; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] -; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX2-NEXT: ret void -; -; -; NVPTX2: Function Attrs: convergent noinline nounwind -; NVPTX2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after -; NVPTX2-SAME: () #[[ATTR1]] { -; NVPTX2-NEXT: entry: -; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) -; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX2-NEXT: ret void -; -; -; NVPTX2: Function Attrs: convergent noinline norecurse nounwind -; NVPTX2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55 -; NVPTX2-SAME: () #[[ATTR0]] { -; NVPTX2-NEXT: entry: -; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment) -; NVPTX2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; NVPTX2: user_code.entry: -; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] -; NVPTX2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 -; NVPTX2-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX2-NEXT: call void @__kmpc_target_deinit() -; NVPTX2-NEXT: ret void -; NVPTX2: worker.exit: -; NVPTX2-NEXT: ret void -; -; -; NVPTX2: Function Attrs: convergent noinline norecurse nounwind -; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__6 -; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX2-NEXT: entry: -; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 -; NVPTX2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; NVPTX2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; NVPTX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX2-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR12]] -; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) -; NVPTX2-NEXT: ret void -; -; -; NVPTX2: Function Attrs: convergent noinline norecurse nounwind -; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__7 -; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX2-NEXT: entry: -; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX2-NEXT: call void @p0() #[[ATTR12]] -; NVPTX2-NEXT: ret void -; -; -; NVPTX2: Function Attrs: convergent noinline norecurse nounwind -; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper -; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; NVPTX2-NEXT: entry: -; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX2-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX2-NEXT: ret void -; -; -; NVPTX2: Function Attrs: convergent noinline norecurse nounwind -; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__8 -; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX2-NEXT: entry: -; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX2-NEXT: call void @p1() #[[ATTR12]] -; NVPTX2-NEXT: ret void -; -; -; NVPTX2: Function Attrs: convergent noinline norecurse nounwind -; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper -; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; NVPTX2-NEXT: entry: -; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX2-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX2-NEXT: ret void -; -; -; NVPTX2: Function Attrs: convergent noinline norecurse nounwind -; NVPTX2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66 -; NVPTX2-SAME: () #[[ATTR0]] { -; NVPTX2-NEXT: entry: -; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment) -; NVPTX2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; NVPTX2: user_code.entry: -; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] -; NVPTX2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 -; NVPTX2-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX2-NEXT: call void @__kmpc_target_deinit() -; NVPTX2-NEXT: ret void -; NVPTX2: worker.exit: -; NVPTX2-NEXT: ret void -; -; -; NVPTX2: Function Attrs: convergent noinline norecurse nounwind -; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__9 -; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX2-NEXT: entry: -; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 -; NVPTX2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; NVPTX2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; NVPTX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX2-NEXT: call void @unknown_no_openmp() #[[ATTR11]] -; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) -; NVPTX2-NEXT: ret void -; -; -; NVPTX2: Function Attrs: convergent noinline norecurse nounwind -; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__10 -; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX2-NEXT: entry: -; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX2-NEXT: call void @p0() #[[ATTR12]] -; NVPTX2-NEXT: ret void -; -; -; NVPTX2: Function Attrs: convergent noinline norecurse nounwind -; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper -; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; NVPTX2-NEXT: entry: -; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX2-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX2-NEXT: ret void -; -; -; NVPTX2: Function Attrs: convergent noinline norecurse nounwind -; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__11 -; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX2-NEXT: entry: -; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX2-NEXT: call void @p1() #[[ATTR12]] -; NVPTX2-NEXT: ret void -; -; -; NVPTX2: Function Attrs: convergent noinline norecurse nounwind -; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper -; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; NVPTX2-NEXT: entry: -; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX2-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX2-NEXT: ret void -; -; -; NVPTX2: Function Attrs: convergent noinline norecurse nounwind -; NVPTX2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77 -; NVPTX2-SAME: () #[[ATTR0]] { -; NVPTX2-NEXT: entry: -; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment) -; NVPTX2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; NVPTX2: user_code.entry: -; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] -; NVPTX2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 -; NVPTX2-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX2-NEXT: call void @__kmpc_target_deinit() -; NVPTX2-NEXT: ret void -; NVPTX2: worker.exit: -; NVPTX2-NEXT: ret void -; -; -; NVPTX2: Function Attrs: convergent noinline norecurse nounwind -; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__12 -; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX2-NEXT: entry: -; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 -; NVPTX2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; NVPTX2-NEXT: call void @unknown_no_openmp() #[[ATTR11]] -; NVPTX2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; NVPTX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) -; NVPTX2-NEXT: ret void -; -; -; NVPTX2: Function Attrs: convergent noinline norecurse nounwind -; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__13 -; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX2-NEXT: entry: -; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX2-NEXT: call void @p0() #[[ATTR12]] -; NVPTX2-NEXT: ret void -; -; -; NVPTX2: Function Attrs: convergent noinline norecurse nounwind -; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper -; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; NVPTX2-NEXT: entry: -; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX2-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX2-NEXT: ret void -; -; -; NVPTX2: Function Attrs: convergent noinline norecurse nounwind -; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__14 -; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX2-NEXT: entry: -; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX2-NEXT: call void @p1() #[[ATTR12]] -; NVPTX2-NEXT: ret void -; -; -; NVPTX2: Function Attrs: convergent noinline norecurse nounwind -; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper -; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; NVPTX2-NEXT: entry: -; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX2-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX2-NEXT: ret void -; -; -; NVPTX2: Function Attrs: convergent noinline norecurse nounwind -; NVPTX2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92 -; NVPTX2-SAME: () #[[ATTR0]] { -; NVPTX2-NEXT: entry: -; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment) -; NVPTX2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; NVPTX2: user_code.entry: -; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] -; NVPTX2-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX2-NEXT: call void @__kmpc_target_deinit() -; NVPTX2-NEXT: ret void -; NVPTX2: worker.exit: -; NVPTX2-NEXT: ret void -; -; -; NVPTX2: Function Attrs: convergent noinline norecurse nounwind -; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__15 -; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX2-NEXT: entry: -; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX2-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR12]] -; NVPTX2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR10]] -; NVPTX2-NEXT: ret void -; -; -; NVPTX2: Function Attrs: noinline nounwind -; NVPTX2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized -; NVPTX2-SAME: (i32 [[A:%.*]]) #[[ATTR6]] { -; NVPTX2-NEXT: entry: -; NVPTX2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; NVPTX2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -; NVPTX2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -; NVPTX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 -; NVPTX2-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] -; NVPTX2: if.then: -; NVPTX2-NEXT: br label [[RETURN:%.*]] -; NVPTX2: if.end: -; NVPTX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -; NVPTX2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 -; NVPTX2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR10]] -; NVPTX2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR10]] -; NVPTX2-NEXT: br label [[RETURN]] -; NVPTX2: return: -; NVPTX2-NEXT: ret void -; -; -; NVPTX2: Function Attrs: convergent noinline nounwind -; NVPTX2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after -; NVPTX2-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { -; NVPTX2-NEXT: entry: -; NVPTX2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; NVPTX2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -; NVPTX2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -; NVPTX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 -; NVPTX2-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] -; NVPTX2: if.then: -; NVPTX2-NEXT: br label [[RETURN:%.*]] -; NVPTX2: if.end: -; NVPTX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -; NVPTX2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 -; NVPTX2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR12]] -; NVPTX2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR12]] -; NVPTX2-NEXT: br label [[RETURN]] -; NVPTX2: return: -; NVPTX2-NEXT: ret void -; -; -; NVPTX2: Function Attrs: noinline norecurse nounwind -; NVPTX2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112 -; NVPTX2-SAME: () #[[ATTR9:[0-9]+]] { -; NVPTX2-NEXT: entry: -; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment) -; NVPTX2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; NVPTX2: user_code.entry: -; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] -; NVPTX2-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX2-NEXT: call void @__kmpc_target_deinit() -; NVPTX2-NEXT: ret void -; NVPTX2: worker.exit: -; NVPTX2-NEXT: ret void -; -; -; NVPTX2: Function Attrs: noinline norecurse nounwind -; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__16 -; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR9]] { -; NVPTX2-NEXT: entry: -; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX2-NEXT: call void @weak_callee_empty() #[[ATTR10]] -; NVPTX2-NEXT: ret void -; -; -; NVPTX2: Function Attrs: noinline nounwind -; NVPTX2-LABEL: define {{[^@]+}}@weak_callee_empty -; NVPTX2-SAME: () #[[ATTR6]] { -; NVPTX2-NEXT: entry: -; NVPTX2-NEXT: ret void -; -; -; NVPTX2: Function Attrs: convergent noinline norecurse nounwind -; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__17 -; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX2-NEXT: entry: -; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX2-NEXT: call void @p0() #[[ATTR12]] -; NVPTX2-NEXT: ret void -; -; -; NVPTX2: Function Attrs: convergent noinline norecurse nounwind -; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper -; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; NVPTX2-NEXT: entry: -; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX2-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX2-NEXT: ret void -; -; -; NVPTX2: Function Attrs: convergent noinline norecurse nounwind -; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__18 -; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX2-NEXT: entry: -; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX2-NEXT: call void @p0() #[[ATTR12]] -; NVPTX2-NEXT: ret void -; -; -; NVPTX2: Function Attrs: convergent noinline norecurse nounwind -; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper -; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; NVPTX2-NEXT: entry: -; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX2-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX2-NEXT: ret void -; -; -; NVPTX2: Function Attrs: noinline nounwind -; NVPTX2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized -; NVPTX2-SAME: () #[[ATTR6]] { -; NVPTX2-NEXT: entry: -; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] -; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX2-NEXT: ret void -; -; -; NVPTX2: Function Attrs: convergent noinline nounwind -; NVPTX2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after -; NVPTX2-SAME: () #[[ATTR1]] { -; NVPTX2-NEXT: entry: -; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) -; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX2-NEXT: ret void -; -; -; NVPTX2: Function Attrs: convergent noinline norecurse nounwind -; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__19 -; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX2-NEXT: entry: -; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX2-NEXT: call void @p0() #[[ATTR12]] -; NVPTX2-NEXT: ret void -; -; -; NVPTX2: Function Attrs: convergent noinline norecurse nounwind -; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper -; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; NVPTX2-NEXT: entry: -; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX2-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX2-NEXT: ret void -; -; -; NVPTX3: Function Attrs: convergent noinline norecurse nounwind -; NVPTX3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14 -; NVPTX3-SAME: () #[[ATTR0:[0-9]+]] { -; NVPTX3-NEXT: entry: -; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment) -; NVPTX3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; NVPTX3: user_code.entry: -; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]] -; NVPTX3-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX3-NEXT: call void @__kmpc_target_deinit() -; NVPTX3-NEXT: ret void -; NVPTX3: worker.exit: -; NVPTX3-NEXT: ret void -; -; -; NVPTX3: Function Attrs: convergent noinline norecurse nounwind -; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__ -; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX3-NEXT: entry: -; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX3-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10:[0-9]+]] -; NVPTX3-NEXT: call void @unknown_no_openmp() #[[ATTR11:[0-9]+]] -; NVPTX3-NEXT: ret void -; -; -; NVPTX3: Function Attrs: convergent noinline nounwind -; NVPTX3-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized -; NVPTX3-SAME: () #[[ATTR1:[0-9]+]] { -; NVPTX3-NEXT: entry: -; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] -; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] -; NVPTX3-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 -; NVPTX3-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] -; NVPTX3: omp_if.then: -; NVPTX3-NEXT: store i32 0, ptr @G, align 4 -; NVPTX3-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] -; NVPTX3-NEXT: br label [[OMP_IF_END]] -; NVPTX3: omp_if.end: -; NVPTX3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) #[[ATTR3]] -; NVPTX3-NEXT: ret void -; -; -; NVPTX3: Function Attrs: convergent noinline nounwind -; NVPTX3-LABEL: define {{[^@]+}}@no_parallel_region_in_here -; NVPTX3-SAME: () #[[ATTR1]] { -; NVPTX3-NEXT: entry: -; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) -; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) -; NVPTX3-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 -; NVPTX3-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] -; NVPTX3: omp_if.then: -; NVPTX3-NEXT: store i32 0, ptr @G, align 4 -; NVPTX3-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) -; NVPTX3-NEXT: br label [[OMP_IF_END]] -; NVPTX3: omp_if.end: -; NVPTX3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) -; NVPTX3-NEXT: ret void -; -; -; NVPTX3: Function Attrs: convergent noinline norecurse nounwind -; NVPTX3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22 -; NVPTX3-SAME: () #[[ATTR0]] { -; NVPTX3-NEXT: entry: -; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment) -; NVPTX3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; NVPTX3: user_code.entry: -; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] -; NVPTX3-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 -; NVPTX3-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX3-NEXT: call void @__kmpc_target_deinit() -; NVPTX3-NEXT: ret void -; NVPTX3: worker.exit: -; NVPTX3-NEXT: ret void -; -; -; NVPTX3: Function Attrs: convergent noinline norecurse nounwind -; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__1 -; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX3-NEXT: entry: -; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 -; NVPTX3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; NVPTX3-NEXT: call void @unknown_no_openmp() #[[ATTR11]] -; NVPTX3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; NVPTX3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX3-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10]] -; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) -; NVPTX3-NEXT: ret void -; -; -; NVPTX3: Function Attrs: convergent noinline norecurse nounwind -; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__2 -; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX3-NEXT: entry: -; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX3-NEXT: call void @p0() #[[ATTR12:[0-9]+]] -; NVPTX3-NEXT: ret void -; -; -; NVPTX3: Function Attrs: convergent noinline norecurse nounwind -; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper -; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; NVPTX3-NEXT: entry: -; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX3-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX3-NEXT: ret void -; -; -; NVPTX3: Function Attrs: convergent noinline norecurse nounwind -; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__3 -; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX3-NEXT: entry: -; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX3-NEXT: call void @p1() #[[ATTR12]] -; NVPTX3-NEXT: ret void -; -; -; NVPTX3: Function Attrs: convergent noinline norecurse nounwind -; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper -; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; NVPTX3-NEXT: entry: -; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX3-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX3-NEXT: ret void -; -; -; NVPTX3: Function Attrs: convergent noinline norecurse nounwind -; NVPTX3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39 -; NVPTX3-SAME: () #[[ATTR0]] { -; NVPTX3-NEXT: entry: -; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment) -; NVPTX3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; NVPTX3: user_code.entry: -; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] -; NVPTX3-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 -; NVPTX3-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX3-NEXT: call void @__kmpc_target_deinit() -; NVPTX3-NEXT: ret void -; NVPTX3: worker.exit: -; NVPTX3-NEXT: ret void -; -; -; NVPTX3: Function Attrs: convergent noinline norecurse nounwind -; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__4 -; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX3-NEXT: entry: -; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; NVPTX3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; NVPTX3-NEXT: call void @unknown_no_openmp() #[[ATTR11]] -; NVPTX3-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR10]] -; NVPTX3-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10]] -; NVPTX3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; NVPTX3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX3-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR10]] -; NVPTX3-NEXT: ret void -; -; -; NVPTX3: Function Attrs: noinline nounwind -; NVPTX3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized -; NVPTX3-SAME: () #[[ATTR6:[0-9]+]] { -; NVPTX3-NEXT: entry: -; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] -; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX3-NEXT: ret void -; -; -; NVPTX3: Function Attrs: convergent noinline nounwind -; NVPTX3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before -; NVPTX3-SAME: () #[[ATTR1]] { -; NVPTX3-NEXT: entry: -; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) -; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX3-NEXT: ret void -; -; -; NVPTX3: Function Attrs: convergent noinline norecurse nounwind -; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__5 -; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX3-NEXT: entry: -; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX3-NEXT: call void @p1() #[[ATTR12]] -; NVPTX3-NEXT: ret void -; -; -; NVPTX3: Function Attrs: convergent noinline norecurse nounwind -; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper -; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; NVPTX3-NEXT: entry: -; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX3-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX3-NEXT: ret void -; -; -; NVPTX3: Function Attrs: noinline nounwind -; NVPTX3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized -; NVPTX3-SAME: () #[[ATTR6]] { -; NVPTX3-NEXT: entry: -; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] -; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX3-NEXT: ret void -; -; -; NVPTX3: Function Attrs: convergent noinline nounwind -; NVPTX3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after -; NVPTX3-SAME: () #[[ATTR1]] { -; NVPTX3-NEXT: entry: -; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) -; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX3-NEXT: ret void -; -; -; NVPTX3: Function Attrs: convergent noinline norecurse nounwind -; NVPTX3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55 -; NVPTX3-SAME: () #[[ATTR0]] { -; NVPTX3-NEXT: entry: -; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment) -; NVPTX3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; NVPTX3: user_code.entry: -; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] -; NVPTX3-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 -; NVPTX3-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX3-NEXT: call void @__kmpc_target_deinit() -; NVPTX3-NEXT: ret void -; NVPTX3: worker.exit: -; NVPTX3-NEXT: ret void -; -; -; NVPTX3: Function Attrs: convergent noinline norecurse nounwind -; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__6 -; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX3-NEXT: entry: -; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 -; NVPTX3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; NVPTX3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; NVPTX3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX3-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR12]] -; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) -; NVPTX3-NEXT: ret void -; -; -; NVPTX3: Function Attrs: convergent noinline norecurse nounwind -; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__7 -; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX3-NEXT: entry: -; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX3-NEXT: call void @p0() #[[ATTR12]] -; NVPTX3-NEXT: ret void -; -; -; NVPTX3: Function Attrs: convergent noinline norecurse nounwind -; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper -; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; NVPTX3-NEXT: entry: -; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX3-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX3-NEXT: ret void -; -; -; NVPTX3: Function Attrs: convergent noinline norecurse nounwind -; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__8 -; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX3-NEXT: entry: -; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX3-NEXT: call void @p1() #[[ATTR12]] -; NVPTX3-NEXT: ret void -; -; -; NVPTX3: Function Attrs: convergent noinline norecurse nounwind -; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper -; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; NVPTX3-NEXT: entry: -; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX3-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX3-NEXT: ret void -; -; -; NVPTX3: Function Attrs: convergent noinline norecurse nounwind -; NVPTX3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66 -; NVPTX3-SAME: () #[[ATTR0]] { -; NVPTX3-NEXT: entry: -; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment) -; NVPTX3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; NVPTX3: user_code.entry: -; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] -; NVPTX3-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 -; NVPTX3-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX3-NEXT: call void @__kmpc_target_deinit() -; NVPTX3-NEXT: ret void -; NVPTX3: worker.exit: -; NVPTX3-NEXT: ret void -; -; -; NVPTX3: Function Attrs: convergent noinline norecurse nounwind -; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__9 -; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX3-NEXT: entry: -; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 -; NVPTX3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; NVPTX3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; NVPTX3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX3-NEXT: call void @unknown_no_openmp() #[[ATTR11]] -; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) -; NVPTX3-NEXT: ret void -; -; -; NVPTX3: Function Attrs: convergent noinline norecurse nounwind -; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__10 -; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX3-NEXT: entry: -; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX3-NEXT: call void @p0() #[[ATTR12]] -; NVPTX3-NEXT: ret void -; -; -; NVPTX3: Function Attrs: convergent noinline norecurse nounwind -; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper -; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; NVPTX3-NEXT: entry: -; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX3-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX3-NEXT: ret void -; -; -; NVPTX3: Function Attrs: convergent noinline norecurse nounwind -; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__11 -; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX3-NEXT: entry: -; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX3-NEXT: call void @p1() #[[ATTR12]] -; NVPTX3-NEXT: ret void -; -; -; NVPTX3: Function Attrs: convergent noinline norecurse nounwind -; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper -; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; NVPTX3-NEXT: entry: -; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX3-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX3-NEXT: ret void -; -; -; NVPTX3: Function Attrs: convergent noinline norecurse nounwind -; NVPTX3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77 -; NVPTX3-SAME: () #[[ATTR0]] { -; NVPTX3-NEXT: entry: -; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment) -; NVPTX3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; NVPTX3: user_code.entry: -; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] -; NVPTX3-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 -; NVPTX3-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX3-NEXT: call void @__kmpc_target_deinit() -; NVPTX3-NEXT: ret void -; NVPTX3: worker.exit: -; NVPTX3-NEXT: ret void -; -; -; NVPTX3: Function Attrs: convergent noinline norecurse nounwind -; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__12 -; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX3-NEXT: entry: -; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 -; NVPTX3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; NVPTX3-NEXT: call void @unknown_no_openmp() #[[ATTR11]] -; NVPTX3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; NVPTX3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) -; NVPTX3-NEXT: ret void -; -; -; NVPTX3: Function Attrs: convergent noinline norecurse nounwind -; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__13 -; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX3-NEXT: entry: -; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX3-NEXT: call void @p0() #[[ATTR12]] -; NVPTX3-NEXT: ret void -; -; -; NVPTX3: Function Attrs: convergent noinline norecurse nounwind -; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper -; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; NVPTX3-NEXT: entry: -; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX3-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX3-NEXT: ret void -; -; -; NVPTX3: Function Attrs: convergent noinline norecurse nounwind -; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__14 -; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX3-NEXT: entry: -; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX3-NEXT: call void @p1() #[[ATTR12]] -; NVPTX3-NEXT: ret void -; -; -; NVPTX3: Function Attrs: convergent noinline norecurse nounwind -; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper -; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; NVPTX3-NEXT: entry: -; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX3-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX3-NEXT: ret void -; -; -; NVPTX3: Function Attrs: convergent noinline norecurse nounwind -; NVPTX3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92 -; NVPTX3-SAME: () #[[ATTR0]] { -; NVPTX3-NEXT: entry: -; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment) -; NVPTX3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; NVPTX3: user_code.entry: -; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] -; NVPTX3-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX3-NEXT: call void @__kmpc_target_deinit() -; NVPTX3-NEXT: ret void -; NVPTX3: worker.exit: -; NVPTX3-NEXT: ret void -; -; -; NVPTX3: Function Attrs: convergent noinline norecurse nounwind -; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__15 -; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX3-NEXT: entry: -; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX3-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR12]] -; NVPTX3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR10]] -; NVPTX3-NEXT: ret void -; -; -; NVPTX3: Function Attrs: noinline nounwind -; NVPTX3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized -; NVPTX3-SAME: (i32 [[A:%.*]]) #[[ATTR6]] { -; NVPTX3-NEXT: entry: -; NVPTX3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; NVPTX3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -; NVPTX3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -; NVPTX3-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 -; NVPTX3-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] -; NVPTX3: if.then: -; NVPTX3-NEXT: br label [[RETURN:%.*]] -; NVPTX3: if.end: -; NVPTX3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -; NVPTX3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 -; NVPTX3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR10]] -; NVPTX3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR10]] -; NVPTX3-NEXT: br label [[RETURN]] -; NVPTX3: return: -; NVPTX3-NEXT: ret void -; -; -; NVPTX3: Function Attrs: convergent noinline nounwind -; NVPTX3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after -; NVPTX3-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { -; NVPTX3-NEXT: entry: -; NVPTX3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; NVPTX3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -; NVPTX3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -; NVPTX3-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 -; NVPTX3-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] -; NVPTX3: if.then: -; NVPTX3-NEXT: br label [[RETURN:%.*]] -; NVPTX3: if.end: -; NVPTX3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -; NVPTX3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 -; NVPTX3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR12]] -; NVPTX3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR12]] -; NVPTX3-NEXT: br label [[RETURN]] -; NVPTX3: return: -; NVPTX3-NEXT: ret void -; -; -; NVPTX3: Function Attrs: noinline norecurse nounwind -; NVPTX3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112 -; NVPTX3-SAME: () #[[ATTR9:[0-9]+]] { -; NVPTX3-NEXT: entry: -; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment) -; NVPTX3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; NVPTX3: user_code.entry: -; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] -; NVPTX3-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX3-NEXT: call void @__kmpc_target_deinit() -; NVPTX3-NEXT: ret void -; NVPTX3: worker.exit: -; NVPTX3-NEXT: ret void -; -; -; NVPTX3: Function Attrs: noinline norecurse nounwind -; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__16 -; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR9]] { -; NVPTX3-NEXT: entry: -; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX3-NEXT: call void @weak_callee_empty() #[[ATTR10]] -; NVPTX3-NEXT: ret void -; -; -; NVPTX3: Function Attrs: noinline nounwind -; NVPTX3-LABEL: define {{[^@]+}}@weak_callee_empty -; NVPTX3-SAME: () #[[ATTR6]] { -; NVPTX3-NEXT: entry: -; NVPTX3-NEXT: ret void -; -; -; NVPTX3: Function Attrs: convergent noinline norecurse nounwind -; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__17 -; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX3-NEXT: entry: -; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX3-NEXT: call void @p0() #[[ATTR12]] -; NVPTX3-NEXT: ret void -; -; -; NVPTX3: Function Attrs: convergent noinline norecurse nounwind -; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper -; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; NVPTX3-NEXT: entry: -; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX3-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX3-NEXT: ret void -; -; -; NVPTX3: Function Attrs: convergent noinline norecurse nounwind -; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__18 -; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX3-NEXT: entry: -; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX3-NEXT: call void @p0() #[[ATTR12]] -; NVPTX3-NEXT: ret void -; -; -; NVPTX3: Function Attrs: convergent noinline norecurse nounwind -; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper -; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; NVPTX3-NEXT: entry: -; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX3-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX3-NEXT: ret void -; -; -; NVPTX3: Function Attrs: noinline nounwind -; NVPTX3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized -; NVPTX3-SAME: () #[[ATTR6]] { -; NVPTX3-NEXT: entry: -; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] -; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX3-NEXT: ret void -; -; -; NVPTX3: Function Attrs: convergent noinline nounwind -; NVPTX3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after -; NVPTX3-SAME: () #[[ATTR1]] { -; NVPTX3-NEXT: entry: -; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) -; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX3-NEXT: ret void -; -; -; NVPTX3: Function Attrs: convergent noinline norecurse nounwind -; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__19 -; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX3-NEXT: entry: -; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX3-NEXT: call void @p0() #[[ATTR12]] -; NVPTX3-NEXT: ret void -; -; -; NVPTX3: Function Attrs: convergent noinline norecurse nounwind -; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper -; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; NVPTX3-NEXT: entry: -; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX3-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX3-NEXT: ret void -; -;. -; AMDGPU1: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; AMDGPU1: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; AMDGPU1: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; AMDGPU1: attributes #[[ATTR3]] = { nounwind } -; AMDGPU1: attributes #[[ATTR4:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; AMDGPU1: attributes #[[ATTR5:[0-9]+]] = { alwaysinline } -; AMDGPU1: attributes #[[ATTR6]] = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; AMDGPU1: attributes #[[ATTR7:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; AMDGPU1: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; AMDGPU1: attributes #[[ATTR9]] = { noinline norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; AMDGPU1: attributes #[[ATTR10]] = { convergent nounwind } -; AMDGPU1: attributes #[[ATTR11]] = { convergent "llvm.assume"="omp_no_openmp" } -; AMDGPU1: attributes #[[ATTR12]] = { convergent } -;. -; NVPTX1: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; NVPTX1: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; NVPTX1: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; NVPTX1: attributes #[[ATTR3]] = { nounwind } -; NVPTX1: attributes #[[ATTR4:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; NVPTX1: attributes #[[ATTR5:[0-9]+]] = { alwaysinline } -; NVPTX1: attributes #[[ATTR6]] = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; NVPTX1: attributes #[[ATTR7:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; NVPTX1: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; NVPTX1: attributes #[[ATTR9]] = { noinline norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; NVPTX1: attributes #[[ATTR10]] = { convergent nounwind } -; NVPTX1: attributes #[[ATTR11]] = { convergent "llvm.assume"="omp_no_openmp" } -; NVPTX1: attributes #[[ATTR12]] = { convergent } -;. -; AMDGPU2: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; AMDGPU2: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; AMDGPU2: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; AMDGPU2: attributes #[[ATTR3]] = { nounwind } -; AMDGPU2: attributes #[[ATTR4:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; AMDGPU2: attributes #[[ATTR5:[0-9]+]] = { alwaysinline } -; AMDGPU2: attributes #[[ATTR6]] = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; AMDGPU2: attributes #[[ATTR7:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; AMDGPU2: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; AMDGPU2: attributes #[[ATTR9]] = { noinline norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; AMDGPU2: attributes #[[ATTR10]] = { convergent nounwind } -; AMDGPU2: attributes #[[ATTR11]] = { convergent "llvm.assume"="omp_no_openmp" } -; AMDGPU2: attributes #[[ATTR12]] = { convergent } -;. -; AMDGPU3: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; AMDGPU3: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; AMDGPU3: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; AMDGPU3: attributes #[[ATTR3]] = { nounwind } -; AMDGPU3: attributes #[[ATTR4:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; AMDGPU3: attributes #[[ATTR5:[0-9]+]] = { alwaysinline } -; AMDGPU3: attributes #[[ATTR6]] = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; AMDGPU3: attributes #[[ATTR7:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; AMDGPU3: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; AMDGPU3: attributes #[[ATTR9]] = { noinline norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; AMDGPU3: attributes #[[ATTR10]] = { convergent nounwind } -; AMDGPU3: attributes #[[ATTR11]] = { convergent "llvm.assume"="omp_no_openmp" } -; AMDGPU3: attributes #[[ATTR12]] = { convergent } -;. -; NVPTX2: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; NVPTX2: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; NVPTX2: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; NVPTX2: attributes #[[ATTR3]] = { nounwind } -; NVPTX2: attributes #[[ATTR4:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; NVPTX2: attributes #[[ATTR5:[0-9]+]] = { alwaysinline } -; NVPTX2: attributes #[[ATTR6]] = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; NVPTX2: attributes #[[ATTR7:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; NVPTX2: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; NVPTX2: attributes #[[ATTR9]] = { noinline norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; NVPTX2: attributes #[[ATTR10]] = { convergent nounwind } -; NVPTX2: attributes #[[ATTR11]] = { convergent "llvm.assume"="omp_no_openmp" } -; NVPTX2: attributes #[[ATTR12]] = { convergent } -;. -; NVPTX3: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; NVPTX3: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; NVPTX3: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; NVPTX3: attributes #[[ATTR3]] = { nounwind } -; NVPTX3: attributes #[[ATTR4:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; NVPTX3: attributes #[[ATTR5:[0-9]+]] = { alwaysinline } -; NVPTX3: attributes #[[ATTR6]] = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; NVPTX3: attributes #[[ATTR7:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; NVPTX3: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; NVPTX3: attributes #[[ATTR9]] = { noinline norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; NVPTX3: attributes #[[ATTR10]] = { convergent nounwind } -; NVPTX3: attributes #[[ATTR11]] = { convergent "llvm.assume"="omp_no_openmp" } -; NVPTX3: attributes #[[ATTR12]] = { convergent } -;. -; AMDGPU1: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2} -; AMDGPU1: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4} -; AMDGPU1: [[META2:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_needed", i32 14, i32 0} -; AMDGPU1: [[META3:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_with_fallback", i32 55, i32 3} -; AMDGPU1: [[META4:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_pure", i32 77, i32 5} -; AMDGPU1: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6} -; AMDGPU1: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7} -; AMDGPU1: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1} -; AMDGPU1: [[META8:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1} -; AMDGPU1: [[META9:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1} -; AMDGPU1: [[META10:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1} -; AMDGPU1: [[META11:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1} -; AMDGPU1: [[META12:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1} -; AMDGPU1: [[META13:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1} -; AMDGPU1: [[META14:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1} -; AMDGPU1: [[META15:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1} -; AMDGPU1: [[META16:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} -; AMDGPU1: [[META17:![0-9]+]] = !{i32 7, !"openmp", i32 50} -; AMDGPU1: [[META18:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} -;. -; NVPTX1: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2} -; NVPTX1: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4} -; NVPTX1: [[META2:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_needed", i32 14, i32 0} -; NVPTX1: [[META3:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_with_fallback", i32 55, i32 3} -; NVPTX1: [[META4:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_pure", i32 77, i32 5} -; NVPTX1: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6} -; NVPTX1: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7} -; NVPTX1: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1} -; NVPTX1: [[META8:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1} -; NVPTX1: [[META9:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1} -; NVPTX1: [[META10:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1} -; NVPTX1: [[META11:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1} -; NVPTX1: [[META12:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1} -; NVPTX1: [[META13:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1} -; NVPTX1: [[META14:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1} -; NVPTX1: [[META15:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1} -; NVPTX1: [[META16:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} -; NVPTX1: [[META17:![0-9]+]] = !{i32 7, !"openmp", i32 50} -; NVPTX1: [[META18:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} -;. -; AMDGPU2: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2} -; AMDGPU2: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4} -; AMDGPU2: [[META2:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_needed", i32 14, i32 0} -; AMDGPU2: [[META3:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_with_fallback", i32 55, i32 3} -; AMDGPU2: [[META4:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_pure", i32 77, i32 5} -; AMDGPU2: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6} -; AMDGPU2: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7} -; AMDGPU2: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1} -; AMDGPU2: [[META8:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1} -; AMDGPU2: [[META9:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1} -; AMDGPU2: [[META10:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1} -; AMDGPU2: [[META11:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1} -; AMDGPU2: [[META12:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1} -; AMDGPU2: [[META13:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1} -; AMDGPU2: [[META14:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1} -; AMDGPU2: [[META15:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1} -; AMDGPU2: [[META16:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} -; AMDGPU2: [[META17:![0-9]+]] = !{i32 7, !"openmp", i32 50} -; AMDGPU2: [[META18:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} -;. -; AMDGPU3: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2} -; AMDGPU3: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4} -; AMDGPU3: [[META2:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_needed", i32 14, i32 0} -; AMDGPU3: [[META3:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_with_fallback", i32 55, i32 3} -; AMDGPU3: [[META4:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_pure", i32 77, i32 5} -; AMDGPU3: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6} -; AMDGPU3: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7} -; AMDGPU3: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1} -; AMDGPU3: [[META8:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1} -; AMDGPU3: [[META9:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1} -; AMDGPU3: [[META10:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1} -; AMDGPU3: [[META11:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1} -; AMDGPU3: [[META12:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1} -; AMDGPU3: [[META13:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1} -; AMDGPU3: [[META14:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1} -; AMDGPU3: [[META15:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1} -; AMDGPU3: [[META16:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} -; AMDGPU3: [[META17:![0-9]+]] = !{i32 7, !"openmp", i32 50} -; AMDGPU3: [[META18:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} -;. -; NVPTX2: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2} -; NVPTX2: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4} -; NVPTX2: [[META2:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_needed", i32 14, i32 0} -; NVPTX2: [[META3:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_with_fallback", i32 55, i32 3} -; NVPTX2: [[META4:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_pure", i32 77, i32 5} -; NVPTX2: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6} -; NVPTX2: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7} -; NVPTX2: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1} -; NVPTX2: [[META8:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1} -; NVPTX2: [[META9:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1} -; NVPTX2: [[META10:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1} -; NVPTX2: [[META11:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1} -; NVPTX2: [[META12:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1} -; NVPTX2: [[META13:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1} -; NVPTX2: [[META14:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1} -; NVPTX2: [[META15:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1} -; NVPTX2: [[META16:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} -; NVPTX2: [[META17:![0-9]+]] = !{i32 7, !"openmp", i32 50} -; NVPTX2: [[META18:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} -;. -; NVPTX3: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2} -; NVPTX3: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4} -; NVPTX3: [[META2:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_needed", i32 14, i32 0} -; NVPTX3: [[META3:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_with_fallback", i32 55, i32 3} -; NVPTX3: [[META4:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_pure", i32 77, i32 5} -; NVPTX3: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6} -; NVPTX3: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7} -; NVPTX3: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1} -; NVPTX3: [[META8:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1} -; NVPTX3: [[META9:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1} -; NVPTX3: [[META10:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1} -; NVPTX3: [[META11:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1} -; NVPTX3: [[META12:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1} -; NVPTX3: [[META13:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1} -; NVPTX3: [[META14:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1} -; NVPTX3: [[META15:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1} -; NVPTX3: [[META16:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} -; NVPTX3: [[META17:![0-9]+]] = !{i32 7, !"openmp", i32 50} -; NVPTX3: [[META18:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} -;. +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14 +; AMDGPU-SAME: () #[[ATTR0:[0-9]+]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) +; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; AMDGPU: user_code.entry: +; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]] +; AMDGPU-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +; AMDGPU-NEXT: ret void +; AMDGPU: worker.exit: +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__ +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10:[0-9]+]] +; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR11:[0-9]+]] +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline nounwind +; AMDGPU-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized +; AMDGPU-SAME: () #[[ATTR1:[0-9]+]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) #[[ATTR3]] +; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] +; AMDGPU-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; AMDGPU-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +; AMDGPU: omp_if.then: +; AMDGPU-NEXT: store i32 0, ptr @G, align 4 +; AMDGPU-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] +; AMDGPU-NEXT: br label [[OMP_IF_END]] +; AMDGPU: omp_if.end: +; AMDGPU-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP0]]) #[[ATTR3]] +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline nounwind +; AMDGPU-LABEL: define {{[^@]+}}@no_parallel_region_in_here +; AMDGPU-SAME: () #[[ATTR1]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) +; AMDGPU-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; AMDGPU-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +; AMDGPU: omp_if.then: +; AMDGPU-NEXT: store i32 0, ptr @G, align 4 +; AMDGPU-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) +; AMDGPU-NEXT: br label [[OMP_IF_END]] +; AMDGPU: omp_if.end: +; AMDGPU-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22 +; AMDGPU-SAME: () #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; AMDGPU: user_code.entry: +; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +; AMDGPU-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +; AMDGPU-NEXT: ret void +; AMDGPU: worker.exit: +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__1 +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR11]] +; AMDGPU-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; AMDGPU-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10]] +; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__2 +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: call void @p0() #[[ATTR12:[0-9]+]] +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper +; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__3 +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: call void @p1() #[[ATTR12]] +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper +; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39 +; AMDGPU-SAME: () #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; AMDGPU: user_code.entry: +; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +; AMDGPU-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +; AMDGPU-NEXT: ret void +; AMDGPU: worker.exit: +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__4 +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR11]] +; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR10]] +; AMDGPU-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10]] +; AMDGPU-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR10]] +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: noinline nounwind +; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized +; AMDGPU-SAME: () #[[ATTR6:[0-9]+]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline nounwind +; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before +; AMDGPU-SAME: () #[[ATTR1]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__5 +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: call void @p1() #[[ATTR12]] +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper +; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: noinline nounwind +; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized +; AMDGPU-SAME: () #[[ATTR6]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline nounwind +; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after +; AMDGPU-SAME: () #[[ATTR1]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55 +; AMDGPU-SAME: () #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; AMDGPU: user_code.entry: +; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +; AMDGPU-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +; AMDGPU-NEXT: ret void +; AMDGPU: worker.exit: +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__6 +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +; AMDGPU-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; AMDGPU-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR12]] +; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__7 +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: call void @p0() #[[ATTR12]] +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper +; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__8 +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: call void @p1() #[[ATTR12]] +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper +; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66 +; AMDGPU-SAME: () #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; AMDGPU: user_code.entry: +; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +; AMDGPU-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +; AMDGPU-NEXT: ret void +; AMDGPU: worker.exit: +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__9 +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +; AMDGPU-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR11]] +; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__10 +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: call void @p0() #[[ATTR12]] +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper +; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__11 +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: call void @p1() #[[ATTR12]] +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper +; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77 +; AMDGPU-SAME: () #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; AMDGPU: user_code.entry: +; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +; AMDGPU-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +; AMDGPU-NEXT: ret void +; AMDGPU: worker.exit: +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__12 +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR11]] +; AMDGPU-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__13 +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: call void @p0() #[[ATTR12]] +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper +; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__14 +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: call void @p1() #[[ATTR12]] +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper +; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92 +; AMDGPU-SAME: () #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; AMDGPU: user_code.entry: +; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +; AMDGPU-NEXT: ret void +; AMDGPU: worker.exit: +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__15 +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR12]] +; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR10]] +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: noinline nounwind +; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized +; AMDGPU-SAME: (i32 [[A:%.*]]) #[[ATTR6]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +; AMDGPU-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 +; AMDGPU-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; AMDGPU: if.then: +; AMDGPU-NEXT: br label [[RETURN:%.*]] +; AMDGPU: if.end: +; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +; AMDGPU-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 +; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR10]] +; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR10]] +; AMDGPU-NEXT: br label [[RETURN]] +; AMDGPU: return: +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline nounwind +; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after +; AMDGPU-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +; AMDGPU-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 +; AMDGPU-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; AMDGPU: if.then: +; AMDGPU-NEXT: br label [[RETURN:%.*]] +; AMDGPU: if.end: +; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +; AMDGPU-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 +; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR12]] +; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR12]] +; AMDGPU-NEXT: br label [[RETURN]] +; AMDGPU: return: +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112 +; AMDGPU-SAME: () #[[ATTR9:[0-9]+]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; AMDGPU: user_code.entry: +; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +; AMDGPU-NEXT: ret void +; AMDGPU: worker.exit: +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__16 +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR9]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: call void @weak_callee_empty() #[[ATTR10]] +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: noinline nounwind +; AMDGPU-LABEL: define {{[^@]+}}@weak_callee_empty +; AMDGPU-SAME: () #[[ATTR6]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__17 +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: call void @p0() #[[ATTR12]] +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper +; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__18 +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: call void @p0() #[[ATTR12]] +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper +; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: noinline nounwind +; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized +; AMDGPU-SAME: () #[[ATTR6]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline nounwind +; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after +; AMDGPU-SAME: () #[[ATTR1]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__19 +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: call void @p0() #[[ATTR12]] +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper +; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14 +; NVPTX-SAME: () #[[ATTR0:[0-9]+]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) +; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; NVPTX: user_code.entry: +; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]] +; NVPTX-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +; NVPTX-NEXT: ret void +; NVPTX: worker.exit: +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__ +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10:[0-9]+]] +; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR11:[0-9]+]] +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline nounwind +; NVPTX-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized +; NVPTX-SAME: () #[[ATTR1:[0-9]+]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) #[[ATTR3]] +; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] +; NVPTX-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; NVPTX-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +; NVPTX: omp_if.then: +; NVPTX-NEXT: store i32 0, ptr @G, align 4 +; NVPTX-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] +; NVPTX-NEXT: br label [[OMP_IF_END]] +; NVPTX: omp_if.end: +; NVPTX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP0]]) #[[ATTR3]] +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline nounwind +; NVPTX-LABEL: define {{[^@]+}}@no_parallel_region_in_here +; NVPTX-SAME: () #[[ATTR1]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) +; NVPTX-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; NVPTX-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +; NVPTX: omp_if.then: +; NVPTX-NEXT: store i32 0, ptr @G, align 4 +; NVPTX-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) +; NVPTX-NEXT: br label [[OMP_IF_END]] +; NVPTX: omp_if.end: +; NVPTX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22 +; NVPTX-SAME: () #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; NVPTX: user_code.entry: +; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +; NVPTX-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +; NVPTX-NEXT: ret void +; NVPTX: worker.exit: +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__1 +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; NVPTX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR11]] +; NVPTX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +; NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; NVPTX-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10]] +; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__2 +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: call void @p0() #[[ATTR12:[0-9]+]] +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper +; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__3 +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: call void @p1() #[[ATTR12]] +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper +; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39 +; NVPTX-SAME: () #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; NVPTX: user_code.entry: +; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +; NVPTX-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +; NVPTX-NEXT: ret void +; NVPTX: worker.exit: +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__4 +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR11]] +; NVPTX-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR10]] +; NVPTX-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10]] +; NVPTX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +; NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; NVPTX-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR10]] +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: noinline nounwind +; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized +; NVPTX-SAME: () #[[ATTR6:[0-9]+]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline nounwind +; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before +; NVPTX-SAME: () #[[ATTR1]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__5 +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: call void @p1() #[[ATTR12]] +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper +; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: noinline nounwind +; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized +; NVPTX-SAME: () #[[ATTR6]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline nounwind +; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after +; NVPTX-SAME: () #[[ATTR1]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55 +; NVPTX-SAME: () #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; NVPTX: user_code.entry: +; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +; NVPTX-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +; NVPTX-NEXT: ret void +; NVPTX: worker.exit: +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__6 +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; NVPTX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +; NVPTX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +; NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; NVPTX-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR12]] +; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__7 +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: call void @p0() #[[ATTR12]] +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper +; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__8 +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: call void @p1() #[[ATTR12]] +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper +; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66 +; NVPTX-SAME: () #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; NVPTX: user_code.entry: +; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +; NVPTX-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +; NVPTX-NEXT: ret void +; NVPTX: worker.exit: +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__9 +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; NVPTX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +; NVPTX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +; NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR11]] +; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__10 +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: call void @p0() #[[ATTR12]] +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper +; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__11 +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: call void @p1() #[[ATTR12]] +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper +; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77 +; NVPTX-SAME: () #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; NVPTX: user_code.entry: +; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +; NVPTX-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +; NVPTX-NEXT: ret void +; NVPTX: worker.exit: +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__12 +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; NVPTX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR11]] +; NVPTX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +; NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__13 +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: call void @p0() #[[ATTR12]] +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper +; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__14 +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: call void @p1() #[[ATTR12]] +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper +; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92 +; NVPTX-SAME: () #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; NVPTX: user_code.entry: +; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +; NVPTX-NEXT: ret void +; NVPTX: worker.exit: +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__15 +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR12]] +; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR10]] +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: noinline nounwind +; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized +; NVPTX-SAME: (i32 [[A:%.*]]) #[[ATTR6]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +; NVPTX-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 +; NVPTX-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; NVPTX: if.then: +; NVPTX-NEXT: br label [[RETURN:%.*]] +; NVPTX: if.end: +; NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +; NVPTX-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 +; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR10]] +; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR10]] +; NVPTX-NEXT: br label [[RETURN]] +; NVPTX: return: +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline nounwind +; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after +; NVPTX-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +; NVPTX-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 +; NVPTX-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; NVPTX: if.then: +; NVPTX-NEXT: br label [[RETURN:%.*]] +; NVPTX: if.end: +; NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +; NVPTX-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 +; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR12]] +; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR12]] +; NVPTX-NEXT: br label [[RETURN]] +; NVPTX: return: +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112 +; NVPTX-SAME: () #[[ATTR9:[0-9]+]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; NVPTX: user_code.entry: +; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +; NVPTX-NEXT: ret void +; NVPTX: worker.exit: +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__16 +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR9]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: call void @weak_callee_empty() #[[ATTR10]] +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: noinline nounwind +; NVPTX-LABEL: define {{[^@]+}}@weak_callee_empty +; NVPTX-SAME: () #[[ATTR6]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__17 +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: call void @p0() #[[ATTR12]] +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper +; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__18 +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: call void @p0() #[[ATTR12]] +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper +; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: noinline nounwind +; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized +; NVPTX-SAME: () #[[ATTR6]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline nounwind +; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after +; NVPTX-SAME: () #[[ATTR1]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__19 +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: call void @p0() #[[ATTR12]] +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper +; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: ret void +; diff --git a/llvm/test/Transforms/OpenMP/custom_state_machines_remarks.ll b/llvm/test/Transforms/OpenMP/custom_state_machines_remarks.ll index 4501a085363fd..e1b7b37f3bcb6 100644 --- a/llvm/test/Transforms/OpenMP/custom_state_machines_remarks.ll +++ b/llvm/test/Transforms/OpenMP/custom_state_machines_remarks.ll @@ -36,8 +36,6 @@ target triple = "nvptx64" ;; } %struct.ident_t = type { i32, i32, i32, i32, ptr } -%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr } -%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 } @0 = private unnamed_addr constant [113 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;__omp_offloading_2a_d80d3d_test_fallback_l11;11;1;;\00", align 1 @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8 @@ -45,24 +43,24 @@ target triple = "nvptx64" @3 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @2 }, align 8 @4 = private unnamed_addr constant [114 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;__omp_offloading_2a_d80d3d_test_fallback_l11;11;25;;\00", align 1 @5 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @4 }, align 8 +@__omp_offloading_2a_d80d3d_test_fallback_l11_exec_mode = weak constant i8 1 @6 = private unnamed_addr constant [116 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;__omp_offloading_2a_d80d3d_test_no_fallback_l20;20;1;;\00", align 1 @7 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @6 }, align 8 @8 = private unnamed_addr constant [85 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;test_no_fallback;20;1;;\00", align 1 @9 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @8 }, align 8 @10 = private unnamed_addr constant [117 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;__omp_offloading_2a_d80d3d_test_no_fallback_l20;20;25;;\00", align 1 @11 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @10 }, align 8 +@__omp_offloading_2a_d80d3d_test_no_fallback_l20_exec_mode = weak constant i8 1 @12 = private unnamed_addr constant [73 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;known;4;1;;\00", align 1 @13 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @12 }, align 8 @G = external global i32 -@__omp_offloading_2a_d80d3d_test_fallback_l11_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr null, ptr null } -@__omp_offloading_2a_d80d3d_test_no_fallback_l20_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr null, ptr null } - +@llvm.compiler.used = appending global [2 x ptr] [ptr @__omp_offloading_2a_d80d3d_test_fallback_l11_exec_mode, ptr @__omp_offloading_2a_d80d3d_test_no_fallback_l20_exec_mode], section "llvm.metadata" ; Function Attrs: convergent norecurse nounwind define weak void @__omp_offloading_2a_d80d3d_test_fallback_l11() local_unnamed_addr #0 !dbg !15 { entry: %captured_vars_addrs.i.i = alloca [0 x ptr], align 8 - %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_2a_d80d3d_test_fallback_l11_kernel_environment) #3, !dbg !18 + %0 = call i32 @__kmpc_target_init(ptr nonnull @1, i8 1, i1 true) #3, !dbg !18 %exec_user_code = icmp eq i32 %0, -1, !dbg !18 br i1 %exec_user_code, label %user_code.entry, label %common.ret, !dbg !18 @@ -77,12 +75,12 @@ user_code.entry: ; preds = %entry call void @__kmpc_parallel_51(ptr noundef nonnull @13, i32 %2, i32 noundef 1, i32 noundef -1, i32 noundef -1, ptr noundef @__omp_outlined__2, ptr noundef @__omp_outlined__2_wrapper, ptr noundef nonnull %captured_vars_addrs.i.i, i64 noundef 0) #3, !dbg !23 call void @llvm.lifetime.end.p0(i64 0, ptr nonnull %captured_vars_addrs.i.i) #3, !dbg !26 call void @unknown() #6, !dbg !27 - call void @__kmpc_target_deinit() #3, !dbg !28 + call void @__kmpc_target_deinit(ptr nonnull @5, i8 1) #3, !dbg !28 br label %common.ret } ; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning. -define weak i32 @__kmpc_target_init(ptr) { +define weak i32 @__kmpc_target_init(ptr, i8, i1) { ret i32 0 } @@ -101,13 +99,13 @@ entry: ; Function Attrs: nounwind declare i32 @__kmpc_global_thread_num(ptr) local_unnamed_addr #3 -declare void @__kmpc_target_deinit() local_unnamed_addr +declare void @__kmpc_target_deinit(ptr, i8) local_unnamed_addr ; Function Attrs: norecurse nounwind define weak void @__omp_offloading_2a_d80d3d_test_no_fallback_l20() local_unnamed_addr #4 !dbg !32 { entry: %captured_vars_addrs.i2.i = alloca [0 x ptr], align 8 - %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_2a_d80d3d_test_no_fallback_l20_kernel_environment) #3, !dbg !33 + %0 = call i32 @__kmpc_target_init(ptr nonnull @7, i8 1, i1 true) #3, !dbg !33 %exec_user_code = icmp eq i32 %0, -1, !dbg !33 br i1 %exec_user_code, label %user_code.entry, label %common.ret, !dbg !33 @@ -130,7 +128,7 @@ user_code.entry: ; preds = %entry call void @llvm.lifetime.end.p0(i64 0, ptr nonnull %captured_vars_addrs.i2.i) #3, !dbg !45 call void @no_openmp() call void @no_parallelism() - call void @__kmpc_target_deinit() #3, !dbg !46 + call void @__kmpc_target_deinit(ptr nonnull @11, i8 1) #3, !dbg !46 br label %common.ret } diff --git a/llvm/test/Transforms/OpenMP/deduplication_target.ll b/llvm/test/Transforms/OpenMP/deduplication_target.ll index ec95a815501f1..f76e0f66d12ca 100644 --- a/llvm/test/Transforms/OpenMP/deduplication_target.ll +++ b/llvm/test/Transforms/OpenMP/deduplication_target.ll @@ -5,13 +5,12 @@ target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" target triple = "nvptx64" %struct.ident_t = type { i32, i32, i32, i32, ptr } -%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr } -%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 } @0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8 @2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 1, i32 0, ptr @0 }, align 8 -@__omp_offloading_50_a3e09bf8_foo_l2_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 2 }, ptr null, ptr null } +@__omp_offloading_50_a3e09bf8_foo_l2_exec_mode = weak constant i8 0 +@llvm.compiler.used = appending global [1 x ptr] [ptr @__omp_offloading_50_a3e09bf8_foo_l2_exec_mode], section "llvm.metadata" declare void @use(i32) @@ -19,37 +18,37 @@ define weak void @__omp_offloading_50_a3e09bf8_foo_l2() #0 { ; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_50_a3e09bf8_foo_l2 ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_50_a3e09bf8_foo_l2_kernel_environment) +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK: user_code.entry: ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) -; CHECK-NEXT: call void @__kmpc_target_deinit() +; CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) ; CHECK-NEXT: ret void ; CHECK: worker.exit: ; CHECK-NEXT: ret void ; entry: %captured_vars_addrs = alloca [0 x ptr], align 8 - %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_50_a3e09bf8_foo_l2_kernel_environment) + %0 = call i32 @__kmpc_target_init(ptr @1, i8 2, i1 false) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit user_code.entry: ; preds = %entry %1 = call i32 @__kmpc_global_thread_num(ptr @2) %2 = call i32 @__kmpc_global_thread_num(ptr @2) - call void @__kmpc_target_deinit() + call void @__kmpc_target_deinit(ptr @1, i8 2) ret void worker.exit: ; preds = %entry ret void } -declare i32 @__kmpc_target_init(ptr) +declare i32 @__kmpc_target_init(ptr, i8, i1) declare i32 @__kmpc_global_thread_num(ptr) #1 -declare void @__kmpc_target_deinit() +declare void @__kmpc_target_deinit(ptr, i8) attributes #0 = { convergent noinline norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } attributes #1 = { nounwind } diff --git a/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll b/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll index 9e3b5f1a21aca..82546309e7608 100644 --- a/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll +++ b/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll @@ -2,27 +2,26 @@ ; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s target triple = "nvptx64" -%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr } -%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 } +%struct.ident_t = type { i32, i32, i32, i32, ptr } -@G = external global i32 - -@kernel0_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1 }, ptr null, ptr null } -@kernel1_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1 }, ptr null, ptr null } -@kernel2_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1 }, ptr null, ptr null } +@kernel0_exec_mode = weak constant i8 1 +@G = external global i32 ;. +; CHECK: @[[KERNEL0_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 ; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i32 -; CHECK: @[[KERNEL0_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 3 }, ptr null, ptr null } -; CHECK: @[[KERNEL1_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 3 }, ptr null, ptr null } -; CHECK: @[[KERNEL2_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 3 }, ptr null, ptr null } +; CHECK: @[[KERNEL1_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 +; CHECK: @[[KERNEL2_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 +; CHECK: @[[KERNEL0_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0 +; CHECK: @[[KERNEL1_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0 +; CHECK: @[[KERNEL2_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0 ; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" ; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8 ;. define weak void @kernel0() #0 { ; CHECK-LABEL: define {{[^@]+}}@kernel0 ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr @kernel0_kernel_environment) +; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false) ; CHECK-NEXT: [[THREAD_ID_IN_BLOCK:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() ; CHECK-NEXT: [[THREAD_IS_MAIN:%.*]] = icmp ne i32 [[THREAD_ID_IN_BLOCK]], 0 ; CHECK-NEXT: br i1 [[THREAD_IS_MAIN]], label [[EXIT_THREADS:%.*]], label [[MAIN_THREAD_USER_CODE:%.*]] @@ -32,21 +31,24 @@ define weak void @kernel0() #0 { ; CHECK-NEXT: call void @helper0() #[[ATTR1:[0-9]+]] ; CHECK-NEXT: call void @helper1() #[[ATTR1]] ; CHECK-NEXT: call void @helper2() #[[ATTR1]] -; CHECK-NEXT: call void @__kmpc_target_deinit() +; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i8 2) ; CHECK-NEXT: ret void ; - %i = call i32 @__kmpc_target_init(ptr @kernel0_kernel_environment) + + %i = call i32 @__kmpc_target_init(ptr null, i8 1, i1 false) call void @helper0() call void @helper1() call void @helper2() - call void @__kmpc_target_deinit() + call void @__kmpc_target_deinit(ptr null, i8 1) ret void } +@kernel1_exec_mode = weak constant i8 1 + define weak void @kernel1() #0 { ; CHECK-LABEL: define {{[^@]+}}@kernel1 ; CHECK-SAME: () #[[ATTR0]] { -; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr @kernel1_kernel_environment) +; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false) ; CHECK-NEXT: [[THREAD_ID_IN_BLOCK:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() ; CHECK-NEXT: [[THREAD_IS_MAIN:%.*]] = icmp ne i32 [[THREAD_ID_IN_BLOCK]], 0 ; CHECK-NEXT: br i1 [[THREAD_IS_MAIN]], label [[EXIT_THREADS:%.*]], label [[MAIN_THREAD_USER_CODE:%.*]] @@ -54,21 +56,24 @@ define weak void @kernel1() #0 { ; CHECK-NEXT: ret void ; CHECK: main.thread.user_code: ; CHECK-NEXT: call void @helper1() #[[ATTR1]] -; CHECK-NEXT: call void @__kmpc_target_deinit() +; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i8 2) ; CHECK-NEXT: ret void ; - %i = call i32 @__kmpc_target_init(ptr @kernel1_kernel_environment) + + %i = call i32 @__kmpc_target_init(ptr null, i8 1, i1 false) call void @helper1() - call void @__kmpc_target_deinit() + call void @__kmpc_target_deinit(ptr null, i8 1) ret void } +@kernel2_exec_mode = weak constant i8 1 + define weak void @kernel2() #0 { ; CHECK-LABEL: define {{[^@]+}}@kernel2 ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr @kernel2_kernel_environment) +; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false) ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[I]], -1 ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] ; CHECK: common.ret: @@ -79,12 +84,12 @@ define weak void @kernel2() #0 { ; CHECK-NEXT: call void @helper1() #[[ATTR1]] ; CHECK-NEXT: call void @helper2() #[[ATTR1]] ; CHECK-NEXT: call void @__kmpc_parallel_51(ptr null, i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; CHECK-NEXT: call void @__kmpc_target_deinit() +; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i8 2) ; CHECK-NEXT: ret void ; entry: %captured_vars_addrs = alloca [0 x ptr], align 8 - %i = call i32 @__kmpc_target_init(ptr @kernel2_kernel_environment) + %i = call i32 @__kmpc_target_init(ptr null, i8 1, i1 true) %exec_user_code = icmp eq i32 %i, -1 br i1 %exec_user_code, label %user_code.entry, label %common.ret @@ -97,7 +102,7 @@ user_code.entry: call void @helper1() call void @helper2() call void @__kmpc_parallel_51(ptr null, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr %captured_vars_addrs, i64 0) - call void @__kmpc_target_deinit() + call void @__kmpc_target_deinit(ptr null, i8 1) ret void } @@ -193,8 +198,8 @@ define internal i32 @__kmpc_get_hardware_num_threads_in_block() { ret i32 %ret } declare i32 @__kmpc_get_hardware_num_threads_in_block_dummy() -declare i32 @__kmpc_target_init(ptr) #1 -declare void @__kmpc_target_deinit() #1 +declare i32 @__kmpc_target_init(ptr, i8, i1 zeroext) #1 +declare void @__kmpc_target_deinit(ptr nocapture readnone, i8) #1 declare void @__kmpc_parallel_51(ptr, i32, i32, i32, i32, ptr, ptr, ptr, i64) declare i32 @__kmpc_global_thread_num(ptr) @@ -209,6 +214,7 @@ attributes #0 = { "omp_target_thread_limit"="666" "omp_target_num_teams"="777"} !2 = !{ptr @kernel0, !"kernel", i32 1} !3 = !{ptr @kernel1, !"kernel", i32 1} !4 = !{ptr @kernel2, !"kernel", i32 1} +; ;. ; CHECK: attributes #[[ATTR0]] = { "omp_target_num_teams"="777" "omp_target_thread_limit"="666" } ; CHECK: attributes #[[ATTR1]] = { nounwind } diff --git a/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold_optnone.ll b/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold_optnone.ll index daae783bdf305..591892f03755a 100644 --- a/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold_optnone.ll +++ b/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold_optnone.ll @@ -2,22 +2,28 @@ ; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s target triple = "nvptx64" -@G = external global i32 +%struct.ident_t = type { i32, i32, i32, i32, ptr } + +@kernel0_exec_mode = weak constant i8 1 +@G = external global i32 ;. +; CHECK: @[[KERNEL0_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 ; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i32 +; CHECK: @[[KERNEL1_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 +; CHECK: @[[KERNEL2_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 ;. define weak void @kernel0() #0 { ; CHECK-LABEL: define {{[^@]+}}@kernel0 ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null) +; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i1 true, i1 false) ; CHECK-NEXT: call void @helper0() ; CHECK-NEXT: call void @helper1() ; CHECK-NEXT: call void @helper2() ; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i1 true) ; CHECK-NEXT: ret void ; - %i = call i32 @__kmpc_target_init(ptr null) + %i = call i32 @__kmpc_target_init(ptr null, i1 true, i1 false) call void @helper0() call void @helper1() call void @helper2() @@ -25,20 +31,24 @@ define weak void @kernel0() #0 { ret void } +@kernel1_exec_mode = weak constant i8 1 + define weak void @kernel1() #0 { ; CHECK-LABEL: define {{[^@]+}}@kernel1 ; CHECK-SAME: () #[[ATTR0]] { -; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null) +; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i1 true, i1 false) ; CHECK-NEXT: call void @helper1() -; CHECK-NEXT: call void @__kmpc_target_deinit() +; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i1 false) ; CHECK-NEXT: ret void ; - %i = call i32 @__kmpc_target_init(ptr null) + %i = call i32 @__kmpc_target_init(ptr null, i1 true, i1 false) call void @helper1() - call void @__kmpc_target_deinit() + call void @__kmpc_target_deinit(ptr null, i1 false) ret void } +@kernel2_exec_mode = weak constant i8 1 + define weak void @kernel2() #0 { ; CHECK-LABEL: define {{[^@]+}}@kernel2 ; CHECK-SAME: () #[[ATTR0]] { @@ -46,14 +56,14 @@ define weak void @kernel2() #0 { ; CHECK-NEXT: call void @helper0() ; CHECK-NEXT: call void @helper1() ; CHECK-NEXT: call void @helper2() -; CHECK-NEXT: call void @__kmpc_target_deinit() +; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i1 false) ; CHECK-NEXT: ret void ; %i = call i32 @__kmpc_target_init(ptr null, i1 false, i1 false) call void @helper0() call void @helper1() call void @helper2() - call void @__kmpc_target_deinit() + call void @__kmpc_target_deinit(ptr null, i1 false) ret void } @@ -102,8 +112,8 @@ define internal void @helper2() { } declare i32 @__kmpc_get_hardware_num_threads_in_block() -declare i32 @__kmpc_target_init(ptr) #1 -declare void @__kmpc_target_deinit() #1 +declare i32 @__kmpc_target_init(ptr, i1 zeroext, i1 zeroext) #1 +declare void @__kmpc_target_deinit(ptr nocapture readnone, i1 zeroext) #1 !llvm.module.flags = !{!0, !1} diff --git a/llvm/test/Transforms/OpenMP/global_constructor.ll b/llvm/test/Transforms/OpenMP/global_constructor.ll index 92518239ed26f..a0b608b10d069 100644 --- a/llvm/test/Transforms/OpenMP/global_constructor.ll +++ b/llvm/test/Transforms/OpenMP/global_constructor.ll @@ -2,17 +2,15 @@ ; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s %struct.ident_t = type { i32, i32, i32, i32, ptr } -%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr } -%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 } @0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8 @_ZL6Device = internal global double 0.000000e+00, align 8 -@__omp_offloading_fd02_85283c04_main_l11_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1 }, ptr @1, ptr null } +@__omp_offloading_fd02_85283c04_main_l11_exec_mode = weak constant i8 0 define weak void @__omp_offloading_fd02_85283c04_main_l11(ptr nonnull align 8 dereferenceable(8) %X) local_unnamed_addr { entry: - %0 = tail call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_85283c04_main_l11_kernel_environment) #0 + %0 = tail call i32 @__kmpc_target_init(ptr nonnull @1, i8 2, i1 false) #0 %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %common.ret @@ -31,13 +29,13 @@ region.guarded: region.barrier: tail call void @__kmpc_barrier_simple_spmd(ptr nonnull @1, i32 %2) - tail call void @__kmpc_target_deinit() #0 + tail call void @__kmpc_target_deinit(ptr nonnull @1, i8 2) #0 br label %common.ret } -declare i32 @__kmpc_target_init(ptr) local_unnamed_addr +declare i32 @__kmpc_target_init(ptr, i8, i1) local_unnamed_addr -declare void @__kmpc_target_deinit() local_unnamed_addr +declare void @__kmpc_target_deinit(ptr, i8) local_unnamed_addr define weak void @__omp_offloading__fd02_85283c04_Device_l6_ctor() { entry: @@ -80,29 +78,29 @@ attributes #1 = { convergent nounwind } ; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_fd02_85283c04_main_l11 ; CHECK-SAME: (ptr nonnull align 8 dereferenceable(8) [[X:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_85283c04_main_l11_kernel_environment) #[[ATTR0:[0-9]+]] +; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @__kmpc_target_init(ptr nonnull @[[GLOB1:[0-9]+]], i8 2, i1 false) #[[ATTR1:[0-9]+]] ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] ; CHECK: common.ret: ; CHECK-NEXT: ret void ; CHECK: user_code.entry: ; CHECK-NEXT: [[TMP1:%.*]] = load double, ptr @_ZL6Device, align 8, !tbaa [[TBAA11:![0-9]+]] -; CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @__kmpc_get_hardware_thread_id_in_block() #[[ATTR0]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @__kmpc_get_hardware_thread_id_in_block() #[[ATTR1]] ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0 ; CHECK-NEXT: br i1 [[TMP3]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]] ; CHECK: region.guarded: ; CHECK-NEXT: store double [[TMP1]], ptr [[X]], align 8, !tbaa [[TBAA11]] ; CHECK-NEXT: br label [[REGION_BARRIER]] ; CHECK: region.barrier: -; CHECK-NEXT: tail call void @__kmpc_barrier_simple_spmd(ptr nonnull @[[GLOB1:[0-9]+]], i32 [[TMP2]]) #[[ATTR0]] -; CHECK-NEXT: tail call void @__kmpc_target_deinit() #[[ATTR0]] +; CHECK-NEXT: tail call void @__kmpc_barrier_simple_spmd(ptr nonnull @[[GLOB1]], i32 [[TMP2]]) #[[ATTR1]] +; CHECK-NEXT: tail call void @__kmpc_target_deinit(ptr nonnull @[[GLOB1]], i8 2) #[[ATTR1]] ; CHECK-NEXT: br label [[COMMON_RET]] ; ; ; CHECK-LABEL: define {{[^@]+}}@__omp_offloading__fd02_85283c04_Device_l6_ctor() { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL_I:%.*]] = tail call double @__nv_log(double noundef 2.000000e+00) #[[ATTR1:[0-9]+]] -; CHECK-NEXT: [[CALL_I2:%.*]] = tail call double @__nv_log(double noundef 2.000000e+00) #[[ATTR1]] +; CHECK-NEXT: [[CALL_I:%.*]] = tail call double @__nv_log(double noundef 2.000000e+00) #[[ATTR0:[0-9]+]] +; CHECK-NEXT: [[CALL_I2:%.*]] = tail call double @__nv_log(double noundef 2.000000e+00) #[[ATTR0]] ; CHECK-NEXT: [[DIV:%.*]] = fdiv double [[CALL_I]], [[CALL_I2]] ; CHECK-NEXT: store double [[DIV]], ptr @_ZL6Device, align 8, !tbaa [[TBAA11]] ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/OpenMP/globalization_remarks.ll b/llvm/test/Transforms/OpenMP/globalization_remarks.ll index 5491c7674cc7d..701c09ff05c1e 100644 --- a/llvm/test/Transforms/OpenMP/globalization_remarks.ll +++ b/llvm/test/Transforms/OpenMP/globalization_remarks.ll @@ -7,19 +7,17 @@ target triple = "nvptx64" ; CHECK: remark: globalization_remarks.c:5:7: Could not move globalized variable to the stack. Variable is potentially captured in call. Mark parameter as `__attribute__((noescape))` to override. ; CHECK: remark: globalization_remarks.c:5:7: Found thread data sharing on the GPU. Expect degraded performance due to data globalization. -%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr } -%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 } +%struct.ident_t = type { i32, i32, i32, i32, ptr } @S = external local_unnamed_addr global ptr -@foo_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1 }, ptr null, ptr null } define void @foo() { entry: - %c = call i32 @__kmpc_target_init(ptr @foo_kernel_environment) + %c = call i32 @__kmpc_target_init(ptr null, i1 false, i1 true) %0 = call ptr @__kmpc_alloc_shared(i64 4), !dbg !10 call void @share(ptr %0), !dbg !10 call void @__kmpc_free_shared(ptr %0) - call void @__kmpc_target_deinit() + call void @__kmpc_target_deinit(ptr null, i1 false) ret void } @@ -33,9 +31,9 @@ declare ptr @__kmpc_alloc_shared(i64) declare void @__kmpc_free_shared(ptr nocapture) -declare i32 @__kmpc_target_init(ptr); +declare i32 @__kmpc_target_init(ptr, i1, i1); -declare void @__kmpc_target_deinit() +declare void @__kmpc_target_deinit(ptr, i1) !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!3, !4, !5, !6} diff --git a/llvm/test/Transforms/OpenMP/gpu_state_machine_function_ptr_replacement.ll b/llvm/test/Transforms/OpenMP/gpu_state_machine_function_ptr_replacement.ll index b05ae97913182..29f969d23e1ff 100644 --- a/llvm/test/Transforms/OpenMP/gpu_state_machine_function_ptr_replacement.ll +++ b/llvm/test/Transforms/OpenMP/gpu_state_machine_function_ptr_replacement.ll @@ -36,20 +36,19 @@ %struct.ident_t = type { i32, i32, i32, i32, ptr } -%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr } -%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 } @0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8 +@__omp_offloading_10301_87b2c_foo_l7_exec_mode = weak constant i8 1 @2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @0 }, align 8 -@__omp_offloading_10301_87b2c_foo_l7_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null } +@llvm.compiler.used = appending global [1 x ptr] [ptr @__omp_offloading_10301_87b2c_foo_l7_exec_mode], section "llvm.metadata" define weak void @__omp_offloading_10301_87b2c_foo_l7() { entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 store i32 0, ptr %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_10301_87b2c_foo_l7_kernel_environment) + %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -57,14 +56,14 @@ user_code.entry: ; preds = %entry %1 = call i32 @__kmpc_global_thread_num(ptr @1) store i32 %1, ptr %.threadid_temp., align 4 call void @__omp_outlined__(ptr %.threadid_temp., ptr %.zero.addr) - call void @__kmpc_target_deinit() + call void @__kmpc_target_deinit(ptr @1, i8 1) ret void worker.exit: ; preds = %entry ret void } -define weak i32 @__kmpc_target_init(ptr %0) { +define weak i32 @__kmpc_target_init(ptr, i8, i1) { ret i32 0 } @@ -147,7 +146,7 @@ entry: declare i32 @__kmpc_global_thread_num(ptr) -declare void @__kmpc_target_deinit() +declare void @__kmpc_target_deinit(ptr, i8) define internal void @__omp_outlined__3(ptr noalias %.global_tid., ptr noalias %.bound_tid.) { entry: diff --git a/llvm/test/Transforms/OpenMP/is_spmd_exec_mode_fold.ll b/llvm/test/Transforms/OpenMP/is_spmd_exec_mode_fold.ll index 80d0576f02234..3502ef79eaba2 100644 --- a/llvm/test/Transforms/OpenMP/is_spmd_exec_mode_fold.ll +++ b/llvm/test/Transforms/OpenMP/is_spmd_exec_mode_fold.ll @@ -2,36 +2,37 @@ ; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s target triple = "nvptx64" -%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr } -%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 } +%struct.ident_t = type { i32, i32, i32, i32, ptr } +@is_spmd_exec_mode = weak constant i8 0 +@will_be_spmd_exec_mode = weak constant i8 1 +@non_spmd_exec_mode = weak constant i8 1 +@will_not_be_spmd_exec_mode = weak constant i8 1 @G = external global i8 -@is_spmd_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 2 }, ptr null, ptr null } -@will_be_spmd_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1 }, ptr null, ptr null } -@none_spmd_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1 }, ptr null, ptr null } -@will_not_be_spmd_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1 }, ptr null, ptr null } +@llvm.compiler.used = appending global [4 x ptr] [ptr @is_spmd_exec_mode, ptr @will_be_spmd_exec_mode, ptr @non_spmd_exec_mode, ptr @will_not_be_spmd_exec_mode ], section "llvm.metadata" ;. +; CHECK: @[[IS_SPMD_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0 +; CHECK: @[[WILL_BE_SPMD_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 +; CHECK: @[[NON_SPMD_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 +; CHECK: @[[WILL_NOT_BE_SPMD_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 ; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i8 -; CHECK: @[[IS_SPMD_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 2 }, ptr null, ptr null } -; CHECK: @[[WILL_BE_SPMD_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 3 }, ptr null, ptr null } -; CHECK: @[[NONE_SPMD_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr null, ptr null } -; CHECK: @[[WILL_NOT_BE_SPMD_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr null, ptr null } +; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [4 x ptr] [ptr @is_spmd_exec_mode, ptr @will_be_spmd_exec_mode, ptr @non_spmd_exec_mode, ptr @will_not_be_spmd_exec_mode], section "llvm.metadata" ;. define weak void @is_spmd() { ; CHECK-LABEL: define {{[^@]+}}@is_spmd() { -; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr @is_spmd_kernel_environment) +; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false) ; CHECK-NEXT: call void @is_spmd_helper1() ; CHECK-NEXT: call void @is_spmd_helper2() ; CHECK-NEXT: call void @is_mixed_helper() -; CHECK-NEXT: call void @__kmpc_target_deinit() +; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i8 2) ; CHECK-NEXT: ret void ; - %i = call i32 @__kmpc_target_init(ptr @is_spmd_kernel_environment) + %i = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false) call void @is_spmd_helper1() call void @is_spmd_helper2() call void @is_mixed_helper() - call void @__kmpc_target_deinit() + call void @__kmpc_target_deinit(ptr null, i8 2) ret void } @@ -39,7 +40,7 @@ define weak void @will_be_spmd() { ; CHECK-LABEL: define {{[^@]+}}@will_be_spmd() { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr @will_be_spmd_kernel_environment) +; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false) ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[I]], -1 ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] ; CHECK: common.ret: @@ -48,12 +49,12 @@ define weak void @will_be_spmd() { ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr null) #[[ATTR2:[0-9]+]] ; CHECK-NEXT: call void @is_spmd_helper2() ; CHECK-NEXT: call void @__kmpc_parallel_51(ptr null, i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; CHECK-NEXT: call void @__kmpc_target_deinit() +; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i8 2) ; CHECK-NEXT: ret void ; entry: %captured_vars_addrs = alloca [0 x ptr], align 8 - %i = call i32 @__kmpc_target_init(ptr @will_be_spmd_kernel_environment) + %i = call i32 @__kmpc_target_init(ptr null, i8 1, i1 true) %exec_user_code = icmp eq i32 %i, -1 br i1 %exec_user_code, label %user_code.entry, label %common.ret @@ -64,41 +65,41 @@ user_code.entry: %0 = call i32 @__kmpc_global_thread_num(ptr null) call void @is_spmd_helper2() call void @__kmpc_parallel_51(ptr null, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr %captured_vars_addrs, i64 0) - call void @__kmpc_target_deinit() + call void @__kmpc_target_deinit(ptr null, i8 1) ret void } define weak void @non_spmd() { ; CHECK-LABEL: define {{[^@]+}}@non_spmd() { -; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr @none_spmd_kernel_environment) +; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i8 1, i1 false) ; CHECK-NEXT: call void @is_generic_helper1() ; CHECK-NEXT: call void @is_generic_helper2() ; CHECK-NEXT: call void @is_mixed_helper() -; CHECK-NEXT: call void @__kmpc_target_deinit() +; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i8 1) ; CHECK-NEXT: ret void ; - %i = call i32 @__kmpc_target_init(ptr @none_spmd_kernel_environment) + %i = call i32 @__kmpc_target_init(ptr null, i8 1, i1 false) call void @is_generic_helper1() call void @is_generic_helper2() call void @is_mixed_helper() - call void @__kmpc_target_deinit() + call void @__kmpc_target_deinit(ptr null, i8 1) ret void } define weak void @will_not_be_spmd() { ; CHECK-LABEL: define {{[^@]+}}@will_not_be_spmd() { -; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr @will_not_be_spmd_kernel_environment) +; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i8 1, i1 false) ; CHECK-NEXT: call void @is_generic_helper1() ; CHECK-NEXT: call void @is_generic_helper2() ; CHECK-NEXT: call void @is_mixed_helper() -; CHECK-NEXT: call void @__kmpc_target_deinit() +; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i8 1) ; CHECK-NEXT: ret void ; - %i = call i32 @__kmpc_target_init(ptr @will_not_be_spmd_kernel_environment) + %i = call i32 @__kmpc_target_init(ptr null, i8 1, i1 false) call void @is_generic_helper1() call void @is_generic_helper2() call void @is_mixed_helper() - call void @__kmpc_target_deinit() + call void @__kmpc_target_deinit(ptr null, i8 1) ret void } @@ -195,8 +196,8 @@ entry: declare void @spmd_compatible() "llvm.assume"="ompx_spmd_amenable" declare i8 @__kmpc_is_spmd_exec_mode() -declare i32 @__kmpc_target_init(ptr) -declare void @__kmpc_target_deinit() +declare i32 @__kmpc_target_init(ptr, i8, i1 zeroext) +declare void @__kmpc_target_deinit(ptr nocapture readnone, i8) declare void @__kmpc_parallel_51(ptr, i32, i32, i32, i32, ptr, ptr, ptr, i64) declare i32 @__kmpc_global_thread_num(ptr) declare void @foo() diff --git a/llvm/test/Transforms/OpenMP/nested_parallelism.ll b/llvm/test/Transforms/OpenMP/nested_parallelism.ll index d2171c43e47d6..8c56e69fda246 100644 --- a/llvm/test/Transforms/OpenMP/nested_parallelism.ll +++ b/llvm/test/Transforms/OpenMP/nested_parallelism.ll @@ -23,31 +23,31 @@ target triple = "nvptx64" %struct.ident_t = type { i32, i32, i32, i32, ptr } -%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr } -%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 } @0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @0 }, align 8 +@__omp_offloading_10302_bd7e0_main_l13_exec_mode = weak protected constant i8 3 +@__omp_offloading_10302_bd7e0_main_l16_exec_mode = weak protected constant i8 1 @i_shared = internal addrspace(3) global [4 x i8] undef, align 16 @i.i_shared = internal addrspace(3) global [4 x i8] undef, align 16 - -@__omp_offloading_10302_bd7e0_main_l13_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 2 }, ptr @1, ptr null } -@__omp_offloading_10302_bd7e0_main_l16_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null } - +@llvm.compiler.used = appending global [2 x ptr] [ptr @__omp_offloading_10302_bd7e0_main_l13_exec_mode, ptr @__omp_offloading_10302_bd7e0_main_l16_exec_mode], section "llvm.metadata" ;. ; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" ; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8 +; CHECK: @[[__OMP_OFFLOADING_10302_BD7E0_MAIN_L13_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak protected constant i8 3 +; CHECK: @[[__OMP_OFFLOADING_10302_BD7E0_MAIN_L16_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak protected constant i8 3 ; CHECK: @[[I_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 16 ; CHECK: @[[I_I_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 16 -; CHECK: @[[__OMP_OFFLOADING_10302_BD7E0_MAIN_L13_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 2 }, ptr @[[GLOB1]], ptr null } -; CHECK: @[[__OMP_OFFLOADING_10302_BD7E0_MAIN_L16_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null } +; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [2 x ptr] [ptr @__omp_offloading_10302_bd7e0_main_l13_exec_mode, ptr @__omp_offloading_10302_bd7e0_main_l16_exec_mode], section "llvm.metadata" +; CHECK: @[[__OMP_OFFLOADING_10302_BD7E0_MAIN_L13_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 +; CHECK: @[[__OMP_OFFLOADING_10302_BD7E0_MAIN_L16_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0 ;. define weak_odr protected void @__omp_offloading_10302_bd7e0_main_l13(i64 noundef %i) local_unnamed_addr #0 { ; CHECK-LABEL: @__omp_offloading_10302_bd7e0_main_l13( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CAPTURED_VARS_ADDRS_I:%.*]] = alloca [1 x ptr], align 8 -; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @__kmpc_target_init(ptr @__omp_offloading_10302_bd7e0_main_l13_kernel_environment) +; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @__kmpc_target_init(ptr nonnull @[[GLOB1]], i8 2, i1 false) ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] ; CHECK: common.ret: @@ -55,7 +55,7 @@ define weak_odr protected void @__omp_offloading_10302_bd7e0_main_l13(i64 nounde ; CHECK: user_code.entry: ; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I]]) ; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR1:[0-9]+]] -; CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @__kmpc_get_hardware_thread_id_in_block() #[[ATTR1]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @__kmpc_get_hardware_thread_id_in_block() ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0 ; CHECK-NEXT: br i1 [[TMP3]], label [[REGION_GUARDED_I:%.*]], label [[_Z3FOOI_INTERNALIZED_EXIT:%.*]] ; CHECK: region.guarded.i: @@ -67,12 +67,12 @@ define weak_odr protected void @__omp_offloading_10302_bd7e0_main_l13(i64 nounde ; CHECK-NEXT: store ptr addrspacecast (ptr addrspace(3) @i_shared to ptr), ptr [[CAPTURED_VARS_ADDRS_I]], align 8 ; CHECK-NEXT: call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__, ptr nonnull @__omp_outlined___wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS_I]], i64 1) ; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I]]) -; CHECK-NEXT: call void @__kmpc_target_deinit() +; CHECK-NEXT: call void @__kmpc_target_deinit(ptr nonnull @[[GLOB1]], i8 2) ; CHECK-NEXT: br label [[COMMON_RET]] ; entry: %captured_vars_addrs.i = alloca [1 x ptr], align 8 - %0 = tail call i32 @__kmpc_target_init(ptr @__omp_offloading_10302_bd7e0_main_l13_kernel_environment) #6 + %0 = tail call i32 @__kmpc_target_init(ptr nonnull @1, i8 2, i1 false) #6 %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %common.ret @@ -96,11 +96,11 @@ _Z3fooi.internalized.exit: ; preds = %user_code.entry, %r store ptr addrspacecast (ptr addrspace(3) @i_shared to ptr), ptr %captured_vars_addrs.i, align 8 call void @__kmpc_parallel_51(ptr nonnull @1, i32 %1, i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__, ptr nonnull @__omp_outlined___wrapper, ptr nonnull %captured_vars_addrs.i, i64 1) #6 call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %captured_vars_addrs.i) - call void @__kmpc_target_deinit() #6 + call void @__kmpc_target_deinit(ptr nonnull @1, i8 2) #6 br label %common.ret } -declare i32 @__kmpc_target_init(ptr) local_unnamed_addr +declare i32 @__kmpc_target_init(ptr, i8, i1) local_unnamed_addr define hidden void @_Z3fooi(i32 noundef %i1) local_unnamed_addr #1 { ; CHECK-LABEL: @_Z3fooi( @@ -131,7 +131,7 @@ define weak_odr protected void @__omp_offloading_10302_bd7e0_main_l16(i64 nounde ; CHECK-LABEL: @__omp_offloading_10302_bd7e0_main_l16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CAPTURED_VARS_ADDRS_I:%.*]] = alloca [1 x ptr], align 8 -; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @__kmpc_target_init(ptr @__omp_offloading_10302_bd7e0_main_l16_kernel_environment) +; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @__kmpc_target_init(ptr nonnull @[[GLOB1]], i8 2, i1 false) ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] ; CHECK: common.ret: @@ -140,16 +140,29 @@ define weak_odr protected void @__omp_offloading_10302_bd7e0_main_l16(i64 nounde ; CHECK-NEXT: [[I_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[I:%.*]] to i32 ; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I]]) ; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR1]] +; CHECK-NEXT: br label [[REGION_CHECK_TID:%.*]] +; CHECK: region.check.tid: +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]] +; CHECK: region.guarded: ; CHECK-NEXT: store i32 [[I_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr addrspacecast (ptr addrspace(3) @i.i_shared to ptr), align 16 +; CHECK-NEXT: br label [[REGION_GUARDED_END:%.*]] +; CHECK: region.guarded.end: +; CHECK-NEXT: br label [[REGION_BARRIER]] +; CHECK: region.barrier: +; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB1]], i32 [[TMP2]]) +; CHECK-NEXT: br label [[REGION_EXIT:%.*]] +; CHECK: region.exit: ; CHECK-NEXT: store ptr addrspacecast (ptr addrspace(3) @i.i_shared to ptr), ptr [[CAPTURED_VARS_ADDRS_I]], align 8 ; CHECK-NEXT: call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__1, ptr nonnull @__omp_outlined__1_wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS_I]], i64 1) ; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I]]) -; CHECK-NEXT: call void @__kmpc_target_deinit() +; CHECK-NEXT: call void @__kmpc_target_deinit(ptr nonnull @[[GLOB1]], i8 2) ; CHECK-NEXT: br label [[COMMON_RET]] ; entry: %captured_vars_addrs.i = alloca [1 x ptr], align 8 - %0 = tail call i32 @__kmpc_target_init(ptr @__omp_offloading_10302_bd7e0_main_l16_kernel_environment) #6 + %0 = tail call i32 @__kmpc_target_init(ptr nonnull @1, i8 1, i1 true) #6 %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %common.ret @@ -164,7 +177,7 @@ user_code.entry: ; preds = %entry store ptr addrspacecast (ptr addrspace(3) @i.i_shared to ptr), ptr %captured_vars_addrs.i, align 8 call void @__kmpc_parallel_51(ptr nonnull @1, i32 %1, i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__1, ptr nonnull @__omp_outlined__1_wrapper, ptr nonnull %captured_vars_addrs.i, i64 1) #6 call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %captured_vars_addrs.i) - call void @__kmpc_target_deinit() #6 + call void @__kmpc_target_deinit(ptr nonnull @1, i8 1) #6 br label %common.ret } diff --git a/llvm/test/Transforms/OpenMP/parallel_level_fold.ll b/llvm/test/Transforms/OpenMP/parallel_level_fold.ll index 9d950647ab8eb..8cb8abe95fc44 100644 --- a/llvm/test/Transforms/OpenMP/parallel_level_fold.ll +++ b/llvm/test/Transforms/OpenMP/parallel_level_fold.ll @@ -2,62 +2,66 @@ ; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s target triple = "nvptx64" -%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr } -%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 } +%struct.ident_t = type { i32, i32, i32, i32, ptr } +@no_spmd_exec_mode = weak constant i8 1 +@spmd_exec_mode = weak constant i8 0 +@parallel_exec_mode = weak constant i8 0 @G = external global i16 -@none_spmd_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1 }, ptr null, ptr null } -@spmd_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 2 }, ptr null, ptr null } -@parallel_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 2 }, ptr null, ptr null } +@llvm.compiler.used = appending global [3 x ptr] [ptr @no_spmd_exec_mode, ptr @spmd_exec_mode, ptr @parallel_exec_mode], section "llvm.metadata" ;. +; CHECK: @[[NO_SPMD_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 +; CHECK: @[[SPMD_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0 +; CHECK: @[[PARALLEL_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0 ; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i16 -; CHECK: @[[NONE_SPMD_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr null, ptr null } -; CHECK: @[[SPMD_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 2 }, ptr null, ptr null } -; CHECK: @[[PARALLEL_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 2 }, ptr null, ptr null } +; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [3 x ptr] [ptr @no_spmd_exec_mode, ptr @spmd_exec_mode, ptr @parallel_exec_mode], section "llvm.metadata" +; CHECK: @[[NONE_SPMD_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0 +; CHECK: @[[SPMD_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0 +; CHECK: @[[PARALLEL_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0 ;. define weak void @none_spmd() { ; CHECK-LABEL: define {{[^@]+}}@none_spmd() { -; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr @none_spmd_kernel_environment) +; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i8 1, i1 false) ; CHECK-NEXT: call void @none_spmd_helper() ; CHECK-NEXT: call void @mixed_helper() -; CHECK-NEXT: call void @__kmpc_target_deinit() +; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i8 1) ; CHECK-NEXT: ret void ; - %i = call i32 @__kmpc_target_init(ptr @none_spmd_kernel_environment) + %i = call i32 @__kmpc_target_init(ptr null, i8 1, i1 false) call void @none_spmd_helper() call void @mixed_helper() - call void @__kmpc_target_deinit() + call void @__kmpc_target_deinit(ptr null, i8 1) ret void } define weak void @spmd() { ; CHECK-LABEL: define {{[^@]+}}@spmd() { -; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr @spmd_kernel_environment) +; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false) ; CHECK-NEXT: call void @spmd_helper() ; CHECK-NEXT: call void @mixed_helper() -; CHECK-NEXT: call void @__kmpc_target_deinit() +; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i8 2) ; CHECK-NEXT: ret void ; - %i = call i32 @__kmpc_target_init(ptr @spmd_kernel_environment) + %i = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false) call void @spmd_helper() call void @mixed_helper() - call void @__kmpc_target_deinit() + call void @__kmpc_target_deinit(ptr null, i8 2) ret void } define weak void @parallel() { ; CHECK-LABEL: define {{[^@]+}}@parallel() { -; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr @parallel_kernel_environment) +; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false) ; CHECK-NEXT: call void @spmd_helper() ; CHECK-NEXT: call void @__kmpc_parallel_51(ptr null, i32 0, i32 0, i32 0, i32 0, ptr null, ptr null, ptr null, i64 0) -; CHECK-NEXT: call void @__kmpc_target_deinit() +; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i8 2) ; CHECK-NEXT: ret void ; - %i = call i32 @__kmpc_target_init(ptr @parallel_kernel_environment) + %i = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false) call void @spmd_helper() call void @__kmpc_parallel_51(ptr null, i32 0, i32 0, i32 0, i32 0, ptr null, ptr null, ptr null, i64 0) - call void @__kmpc_target_deinit() + call void @__kmpc_target_deinit(ptr null, i8 2) ret void } @@ -129,8 +133,8 @@ define internal void @parallel_helper() { declare void @foo() declare void @bar() declare zeroext i16 @__kmpc_parallel_level(ptr, i32) -declare i32 @__kmpc_target_init(ptr) #1 -declare void @__kmpc_target_deinit() #1 +declare i32 @__kmpc_target_init(ptr, i8 zeroext, i1 zeroext) #1 +declare void @__kmpc_target_deinit(ptr nocapture readnone, i8 zeroext) #1 !llvm.module.flags = !{!0, !1} !nvvm.annotations = !{!2, !3, !4} diff --git a/llvm/test/Transforms/OpenMP/remove_globalization.ll b/llvm/test/Transforms/OpenMP/remove_globalization.ll index 91e1066f010e7..203de81aaf29d 100644 --- a/llvm/test/Transforms/OpenMP/remove_globalization.ll +++ b/llvm/test/Transforms/OpenMP/remove_globalization.ll @@ -5,12 +5,6 @@ target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" target triple = "nvptx64" -@S = external local_unnamed_addr global ptr -%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr } -%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 } -@kernel_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr null, ptr null } - - ; UTC_ARGS: --disable ; CHECK-REMARKS: remark: remove_globalization.c:4:2: Could not move globalized variable to the stack. Variable is potentially captured in call. Mark parameter as `__attribute__((noescape))` to override. ; CHECK-REMARKS: remark: remove_globalization.c:2:2: Moving globalized variable to the stack. @@ -18,55 +12,59 @@ target triple = "nvptx64" ; CHECK-REMARKS: remark: remove_globalization.c:4:2: Found thread data sharing on the GPU. Expect degraded performance due to data globalization. ; UTC_ARGS: --enable +@S = external local_unnamed_addr global ptr + +%struct.ident_t = type { i32, i32, i32, i32, ptr } + ; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning. ;. ; CHECK: @[[S:[a-zA-Z0-9_$"\\.-]+]] = external local_unnamed_addr global ptr -; CHECK: @[[KERNEL_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr null, ptr null } +; CHECK: @[[KERNEL_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0 ;. ; CHECK-DISABLED: @[[S:[a-zA-Z0-9_$"\\.-]+]] = external local_unnamed_addr global ptr -; CHECK-DISABLED: @[[KERNEL_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr null, ptr null } +; CHECK-DISABLED: @[[KERNEL_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0 ;. -define weak i32 @__kmpc_target_init(ptr %0) { +define weak i32 @__kmpc_target_init(ptr, i8, i1) { ; CHECK-LABEL: define {{[^@]+}}@__kmpc_target_init -; CHECK-SAME: (ptr [[TMP0:%.*]]) { +; CHECK-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { ; CHECK-NEXT: ret i32 0 ; ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__kmpc_target_init -; CHECK-DISABLED-SAME: (ptr [[TMP0:%.*]]) { +; CHECK-DISABLED-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { ; CHECK-DISABLED-NEXT: ret i32 0 ; ret i32 0 } -declare void @__kmpc_target_deinit() +declare void @__kmpc_target_deinit(ptr, i8) define void @kernel() { ; CHECK-LABEL: define {{[^@]+}}@kernel() { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @kernel_kernel_environment) +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr nonnull null, i8 1, i1 false) ; CHECK-NEXT: call void @foo() #[[ATTR0:[0-9]+]] ; CHECK-NEXT: call void @bar() #[[ATTR0]] ; CHECK-NEXT: call void @convert_and_move_alloca() #[[ATTR0]] ; CHECK-NEXT: call void @unknown_no_openmp() -; CHECK-NEXT: call void @__kmpc_target_deinit() +; CHECK-NEXT: call void @__kmpc_target_deinit(ptr nonnull null, i8 1) ; CHECK-NEXT: ret void ; ; CHECK-DISABLED-LABEL: define {{[^@]+}}@kernel() { ; CHECK-DISABLED-NEXT: entry: -; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @kernel_kernel_environment) +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr nonnull null, i8 1, i1 false) ; CHECK-DISABLED-NEXT: call void @foo() #[[ATTR0:[0-9]+]] ; CHECK-DISABLED-NEXT: call void @bar() #[[ATTR0]] ; CHECK-DISABLED-NEXT: call void @convert_and_move_alloca() #[[ATTR0]] ; CHECK-DISABLED-NEXT: call void @unknown_no_openmp() -; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit() +; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr nonnull null, i8 1) ; CHECK-DISABLED-NEXT: ret void ; entry: - %0 = call i32 @__kmpc_target_init(ptr @kernel_kernel_environment) + %0 = call i32 @__kmpc_target_init(ptr nonnull null, i8 1, i1 true) call void @foo() call void @bar() call void @convert_and_move_alloca() call void @unknown_no_openmp() - call void @__kmpc_target_deinit() + call void @__kmpc_target_deinit(ptr nonnull null, i8 1) ret void } diff --git a/llvm/test/Transforms/OpenMP/replace_globalization.ll b/llvm/test/Transforms/OpenMP/replace_globalization.ll index e79a4d141a3d7..653a338b621a6 100644 --- a/llvm/test/Transforms/OpenMP/replace_globalization.ll +++ b/llvm/test/Transforms/OpenMP/replace_globalization.ll @@ -14,30 +14,28 @@ target triple = "nvptx64" ; UTC_ARGS: --enable %struct.ident_t = type { i32, i32, i32, i32, ptr } -%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr } -%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 } @S = external local_unnamed_addr global ptr @0 = private unnamed_addr constant [113 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;__omp_offloading_2a_d80d3d_test_fallback_l11;11;1;;\00", align 1 @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8 -@foo_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1 }, ptr @1, ptr null } -@bar_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1 }, ptr @1, ptr null } -@baz_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 2 }, ptr @1, ptr null } +@foo_exec_mode = weak constant i8 1 +@bar_exec_mode = weak constant i8 1 +@baz_spmd_exec_mode = weak constant i8 2 define dso_local void @foo() "kernel" { entry: - %c = call i32 @__kmpc_target_init(ptr @foo_kernel_environment) + %c = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) %x = call align 4 ptr @__kmpc_alloc_shared(i64 4) call void @unknown_no_openmp() call void @use(ptr %x) call void @__kmpc_free_shared(ptr %x, i64 4) - call void @__kmpc_target_deinit() + call void @__kmpc_target_deinit(ptr @1, i8 1) ret void } define void @bar() "kernel" { - %c = call i32 @__kmpc_target_init(ptr @bar_kernel_environment) + %c = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) call void @unknown_no_openmp() %cmp = icmp eq i32 %c, -1 br i1 %cmp, label %master1, label %exit @@ -56,12 +54,12 @@ master2: call void @__kmpc_free_shared(ptr %y, i64 4) br label %exit exit: - call void @__kmpc_target_deinit() + call void @__kmpc_target_deinit(ptr @1, i8 1) ret void } define void @baz_spmd() "kernel" { - %c = call i32 @__kmpc_target_init(ptr @baz_kernel_environment) + %c = call i32 @__kmpc_target_init(ptr @1, i8 2, i1 true) call void @unknown_no_openmp() %c0 = icmp eq i32 %c, -1 br i1 %c0, label %master3, label %exit @@ -71,7 +69,7 @@ master3: call void @__kmpc_free_shared(ptr %z, i64 24) br label %exit exit: - call void @__kmpc_target_deinit() + call void @__kmpc_target_deinit(ptr @1, i8 2) ret void } @@ -99,11 +97,11 @@ declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x() declare i32 @llvm.nvvm.read.ptx.sreg.warpsize() ; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning. -define weak i32 @__kmpc_target_init(ptr) { +define weak i32 @__kmpc_target_init(ptr, i8, i1) { ret i32 0 } -declare void @__kmpc_target_deinit() +declare void @__kmpc_target_deinit(ptr, i8) declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp" @@ -129,29 +127,32 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp" ; CHECK: @[[S:[a-zA-Z0-9_$"\\.-]+]] = external local_unnamed_addr global ptr ; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [113 x i8] c" ; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 -; CHECK: @[[FOO_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null } -; CHECK: @[[BAR_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null } -; CHECK: @[[BAZ_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 2 }, ptr @[[GLOB1]], ptr null } +; CHECK: @[[FOO_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 +; CHECK: @[[BAR_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 +; CHECK: @[[BAZ_SPMD_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 2 ; CHECK: @[[OFFSET:[a-zA-Z0-9_$"\\.-]+]] = global i32 undef ; CHECK: @[[STACK:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [1024 x i8] undef +; CHECK: @[[FOO_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0 +; CHECK: @[[BAR_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0 +; CHECK: @[[BAZ_SPMD_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0 ; CHECK: @[[X_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [16 x i8] poison, align 4 ; CHECK: @[[Y_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4 ;. ; CHECK-LABEL: define {{[^@]+}}@foo ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(ptr @foo_kernel_environment) +; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; CHECK-NEXT: [[X:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i64 4) #[[ATTR6:[0-9]+]] ; CHECK-NEXT: call void @unknown_no_openmp() ; CHECK-NEXT: call void @use.internalized(ptr nofree [[X]]) #[[ATTR3:[0-9]+]] ; CHECK-NEXT: call void @__kmpc_free_shared(ptr [[X]], i64 4) #[[ATTR6]] -; CHECK-NEXT: call void @__kmpc_target_deinit() +; CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@bar ; CHECK-SAME: () #[[ATTR0]] { -; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(ptr @bar_kernel_environment) +; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; CHECK-NEXT: call void @unknown_no_openmp() ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C]], -1 ; CHECK-NEXT: br i1 [[CMP]], label [[MASTER1:%.*]], label [[EXIT:%.*]] @@ -166,13 +167,13 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp" ; CHECK-NEXT: call void @use.internalized(ptr nofree addrspacecast (ptr addrspace(3) @y_shared to ptr)) #[[ATTR3]] ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: -; CHECK-NEXT: call void @__kmpc_target_deinit() +; CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@baz_spmd ; CHECK-SAME: () #[[ATTR0]] { -; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(ptr @baz_kernel_environment) +; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 true) ; CHECK-NEXT: call void @unknown_no_openmp() ; CHECK-NEXT: [[C0:%.*]] = icmp eq i32 [[C]], -1 ; CHECK-NEXT: br i1 [[C0]], label [[MASTER3:%.*]], label [[EXIT:%.*]] @@ -182,7 +183,7 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp" ; CHECK-NEXT: call void @__kmpc_free_shared(ptr [[Z]], i64 24) #[[ATTR6]] ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: -; CHECK-NEXT: call void @__kmpc_target_deinit() +; CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) ; CHECK-NEXT: ret void ; ; @@ -210,7 +211,7 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp" ; ; ; CHECK-LABEL: define {{[^@]+}}@__kmpc_target_init -; CHECK-SAME: (ptr [[TMP0:%.*]]) { +; CHECK-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { ; CHECK-NEXT: ret i32 0 ; ;. diff --git a/llvm/test/Transforms/OpenMP/single_threaded_execution.ll b/llvm/test/Transforms/OpenMP/single_threaded_execution.ll index c4e90529d812c..06433454be0b4 100644 --- a/llvm/test/Transforms/OpenMP/single_threaded_execution.ll +++ b/llvm/test/Transforms/OpenMP/single_threaded_execution.ll @@ -4,12 +4,10 @@ ; ModuleID = 'single_threaded_exeuction.c' %struct.ident_t = type { i32, i32, i32, i32, ptr } -%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr } -%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 } @0 = private unnamed_addr constant [1 x i8] c"\00", align 1 @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8 -@kernel_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1 }, ptr @1, ptr null } +@kernel_exec_mode = weak constant i8 1 ; CHECK-NOT: [openmp-opt] Basic block @kernel entry is executed by a single thread. @@ -17,7 +15,7 @@ ; CHECK-NOT: [openmp-opt] Basic block @kernel if.else is executed by a single thread. ; CHECK-NOT: [openmp-opt] Basic block @kernel if.end is executed by a single thread. define void @kernel() { - %call = call i32 @__kmpc_target_init(ptr @kernel_kernel_environment) + %call = call i32 @__kmpc_target_init(ptr nonnull @1, i8 1, i1 false) %cmp = icmp eq i32 %call, -1 br i1 %cmp, label %if.then, label %if.else if.then: @@ -25,7 +23,7 @@ if.then: if.else: br label %if.end if.end: - call void @__kmpc_target_deinit() + call void @__kmpc_target_deinit(ptr null, i8 1) ret void } @@ -108,9 +106,9 @@ declare i32 @llvm.amdgcn.workitem.id.x() declare void @__kmpc_kernel_prepare_parallel(ptr) -declare i32 @__kmpc_target_init(ptr) +declare i32 @__kmpc_target_init(ptr, i8, i1) -declare void @__kmpc_target_deinit() +declare void @__kmpc_target_deinit(ptr, i8) attributes #0 = { cold noinline } diff --git a/llvm/test/Transforms/OpenMP/spmdization.ll b/llvm/test/Transforms/OpenMP/spmdization.ll index b47293ea65793..2250864eb96d6 100644 --- a/llvm/test/Transforms/OpenMP/spmdization.ll +++ b/llvm/test/Transforms/OpenMP/spmdization.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals ; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt < %s | FileCheck %s --check-prefixes=AMDGPU ; RUN: opt --mtriple=nvptx64-- -S -passes=openmp-opt < %s | FileCheck %s --check-prefixes=NVPTX -; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt -openmp-opt-disable-spmdization < %s | FileCheck %s --check-prefix=AMDGPU-DISABLED1 -; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt-postlink < %s | FileCheck %s --check-prefix=AMDGPU-DISABLED2 -; RUN: opt --mtriple=nvptx64-- -S -passes=openmp-opt -openmp-opt-disable-spmdization < %s | FileCheck %s --check-prefix=NVPTX-DISABLED1 -; RUN: opt --mtriple=nvptx64-- -S -passes=openmp-opt-postlink < %s | FileCheck %s --check-prefix=NVPTX-DISABLED2 +; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt -openmp-opt-disable-spmdization < %s | FileCheck %s --check-prefix=AMDGPU-DISABLED +; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt-postlink < %s | FileCheck %s --check-prefix=AMDGPU-DISABLED +; RUN: opt --mtriple=nvptx64-- -S -passes=openmp-opt -openmp-opt-disable-spmdization < %s | FileCheck %s --check-prefix=NVPTX-DISABLED +; RUN: opt --mtriple=nvptx64-- -S -passes=openmp-opt-postlink < %s | FileCheck %s --check-prefix=NVPTX-DISABLED ;; void unknown(void); ;; void spmd_amenable(void) __attribute__((assume("ompx_spmd_amenable"))); @@ -93,26 +93,74 @@ %struct.kmp_task_t = type { ptr, ptr, i32, %union.kmp_cmplrdata_t, %union.kmp_cmplrdata_t } %union.kmp_cmplrdata_t = type { ptr } %struct.anon = type {} -%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 } -%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr } @0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8 -@__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null } -@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null } -@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null } -@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null } -@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null } -@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null } +@__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode = weak constant i8 1 +@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode = weak constant i8 1 +@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode = weak constant i8 1 +@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode = weak constant i8 1 +@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode = weak constant i8 1 +@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode = weak constant i8 1 +@llvm.compiler.used = appending global [6 x ptr] [ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata" +; Function Attrs: alwaysinline convergent norecurse nounwind +;. +; AMDGPU: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" +; AMDGPU: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 +; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 +; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 +; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 +; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 +; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 +; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 +; AMDGPU: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [6 x ptr] [ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata" +; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5__DEBUG_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 +; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 +; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 +; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 +; AMDGPU: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8 +; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0 +; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 +; AMDGPU: @[[X_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4 +; AMDGPU: @[[X_SHARED_1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4 +; AMDGPU: @[[__OMP_OUTLINED__9_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef +;. +; NVPTX: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" +; NVPTX: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 +; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 +; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 +; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 +; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 +; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 +; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 +; NVPTX: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [6 x ptr] [ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata" +; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5__DEBUG_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 +; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 +; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 +; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 +; NVPTX: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8 +; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0 +; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 +; NVPTX: @[[X_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4 +; NVPTX: @[[X_SHARED1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4 +; NVPTX: @[[__OMP_OUTLINED__9_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef +;. ; AMDGPU-DISABLED: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" ; AMDGPU-DISABLED: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 -; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null } -; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null } -; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null } -; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null } -; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null } -; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } +; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 +; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 +; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 +; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 +; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 +; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 +; AMDGPU-DISABLED: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [6 x ptr] [ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata" +; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5__DEBUG_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 +; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 +; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 +; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 +; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0 +; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 ; AMDGPU-DISABLED: @[[X_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4 ; AMDGPU-DISABLED: @[[X_SHARED_1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4 ; AMDGPU-DISABLED: @[[__OMP_OUTLINED__1_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef @@ -120,14 +168,22 @@ ; AMDGPU-DISABLED: @[[__OMP_OUTLINED__5_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef ; AMDGPU-DISABLED: @[[__OMP_OUTLINED__7_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef ; AMDGPU-DISABLED: @[[__OMP_OUTLINED__9_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef +;. ; NVPTX-DISABLED: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" ; NVPTX-DISABLED: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 -; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null } -; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null } -; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null } -; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null } -; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null } -; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } +; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 +; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 +; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 +; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 +; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 +; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 +; NVPTX-DISABLED: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [6 x ptr] [ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata" +; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5__DEBUG_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 +; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 +; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 +; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 +; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0 +; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 ; NVPTX-DISABLED: @[[X_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4 ; NVPTX-DISABLED: @[[X_SHARED1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4 ; NVPTX-DISABLED: @[[__OMP_OUTLINED__1_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef @@ -136,96 +192,6 @@ ; NVPTX-DISABLED: @[[__OMP_OUTLINED__7_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef ; NVPTX-DISABLED: @[[__OMP_OUTLINED__9_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef ;. -; AMDGPU: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" -; AMDGPU: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 -; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null } -; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null } -; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null } -; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null } -; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null } -; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; AMDGPU: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8 -; AMDGPU: @[[X_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4 -; AMDGPU: @[[X_SHARED_1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4 -; AMDGPU: @[[__OMP_OUTLINED__9_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef -;. -; NVPTX: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" -; NVPTX: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 -; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null } -; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null } -; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null } -; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null } -; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null } -; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; NVPTX: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8 -; NVPTX: @[[X_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4 -; NVPTX: @[[X_SHARED1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4 -; NVPTX: @[[__OMP_OUTLINED__9_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef -;. -; AMDGPU-DISABLED1: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" -; AMDGPU-DISABLED1: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 -; AMDGPU-DISABLED1: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; AMDGPU-DISABLED1: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; AMDGPU-DISABLED1: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; AMDGPU-DISABLED1: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; AMDGPU-DISABLED1: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null } -; AMDGPU-DISABLED1: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; AMDGPU-DISABLED1: @[[X_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4 -; AMDGPU-DISABLED1: @[[X_SHARED_1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4 -; AMDGPU-DISABLED1: @[[__OMP_OUTLINED__1_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef -; AMDGPU-DISABLED1: @[[__OMP_OUTLINED__3_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef -; AMDGPU-DISABLED1: @[[__OMP_OUTLINED__5_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef -; AMDGPU-DISABLED1: @[[__OMP_OUTLINED__7_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef -; AMDGPU-DISABLED1: @[[__OMP_OUTLINED__9_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef -;. -; AMDGPU-DISABLED2: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" -; AMDGPU-DISABLED2: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 -; AMDGPU-DISABLED2: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null } -; AMDGPU-DISABLED2: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null } -; AMDGPU-DISABLED2: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null } -; AMDGPU-DISABLED2: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null } -; AMDGPU-DISABLED2: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null } -; AMDGPU-DISABLED2: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; AMDGPU-DISABLED2: @[[X_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4 -; AMDGPU-DISABLED2: @[[X_SHARED_1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4 -; AMDGPU-DISABLED2: @[[__OMP_OUTLINED__1_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef -; AMDGPU-DISABLED2: @[[__OMP_OUTLINED__3_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef -; AMDGPU-DISABLED2: @[[__OMP_OUTLINED__5_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef -; AMDGPU-DISABLED2: @[[__OMP_OUTLINED__7_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef -; AMDGPU-DISABLED2: @[[__OMP_OUTLINED__9_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef -;. -; NVPTX-DISABLED1: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" -; NVPTX-DISABLED1: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 -; NVPTX-DISABLED1: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; NVPTX-DISABLED1: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; NVPTX-DISABLED1: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; NVPTX-DISABLED1: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; NVPTX-DISABLED1: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null } -; NVPTX-DISABLED1: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; NVPTX-DISABLED1: @[[X_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4 -; NVPTX-DISABLED1: @[[X_SHARED1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4 -; NVPTX-DISABLED1: @[[__OMP_OUTLINED__1_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef -; NVPTX-DISABLED1: @[[__OMP_OUTLINED__3_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef -; NVPTX-DISABLED1: @[[__OMP_OUTLINED__5_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef -; NVPTX-DISABLED1: @[[__OMP_OUTLINED__7_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef -; NVPTX-DISABLED1: @[[__OMP_OUTLINED__9_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef -;. -; NVPTX-DISABLED2: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" -; NVPTX-DISABLED2: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 -; NVPTX-DISABLED2: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null } -; NVPTX-DISABLED2: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null } -; NVPTX-DISABLED2: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null } -; NVPTX-DISABLED2: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null } -; NVPTX-DISABLED2: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null } -; NVPTX-DISABLED2: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; NVPTX-DISABLED2: @[[X_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4 -; NVPTX-DISABLED2: @[[X_SHARED1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4 -; NVPTX-DISABLED2: @[[__OMP_OUTLINED__1_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef -; NVPTX-DISABLED2: @[[__OMP_OUTLINED__3_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef -; NVPTX-DISABLED2: @[[__OMP_OUTLINED__5_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef -; NVPTX-DISABLED2: @[[__OMP_OUTLINED__7_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef -; NVPTX-DISABLED2: @[[__OMP_OUTLINED__9_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef -;. define weak void @__omp_offloading_fd02_2044372e_sequential_loop_l5() #0 { ; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5 ; AMDGPU-SAME: () #[[ATTR0:[0-9]+]] { @@ -237,34 +203,16 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_l5() #0 { ; NVPTX-NEXT: call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() ; NVPTX-NEXT: ret void ; -; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5 -; AMDGPU-DISABLED1-SAME: () #[[ATTR0:[0-9]+]] { -; AMDGPU-DISABLED1-NEXT: call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() -; AMDGPU-DISABLED1-NEXT: ret void -; -; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5 -; AMDGPU-DISABLED2-SAME: () #[[ATTR0:[0-9]+]] { -; AMDGPU-DISABLED2-NEXT: call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() -; AMDGPU-DISABLED2-NEXT: ret void -; -; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5 -; NVPTX-DISABLED1-SAME: () #[[ATTR0:[0-9]+]] { -; NVPTX-DISABLED1-NEXT: call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() -; NVPTX-DISABLED1-NEXT: ret void -; -; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5 -; NVPTX-DISABLED2-SAME: () #[[ATTR0:[0-9]+]] { -; NVPTX-DISABLED2-NEXT: call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() -; NVPTX-DISABLED2-NEXT: ret void -; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5 ; AMDGPU-DISABLED-SAME: () #[[ATTR0:[0-9]+]] { ; AMDGPU-DISABLED-NEXT: call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() ; AMDGPU-DISABLED-NEXT: ret void +; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5 ; NVPTX-DISABLED-SAME: () #[[ATTR0:[0-9]+]] { ; NVPTX-DISABLED-NEXT: call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() ; NVPTX-DISABLED-NEXT: ret void +; call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() ret void } @@ -275,7 +223,7 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] ; AMDGPU: common.ret: @@ -284,7 +232,7 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5:[0-9]+]] ; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]] ; AMDGPU-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit() +; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) ; AMDGPU-NEXT: br label [[COMMON_RET]] ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug @@ -292,7 +240,7 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] ; NVPTX: common.ret: @@ -301,230 +249,16 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5:[0-9]+]] ; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]] ; NVPTX-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] -; NVPTX-NEXT: call void @__kmpc_target_deinit() +; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) ; NVPTX-NEXT: br label [[COMMON_RET]] ; -; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug -; AMDGPU-DISABLED1-SAME: () #[[ATTR1:[0-9]+]] { -; AMDGPU-DISABLED1-NEXT: entry: -; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment) -; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 -; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] -; AMDGPU-DISABLED1: is_worker_check: -; AMDGPU-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -; AMDGPU-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() -; AMDGPU-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] -; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] -; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] -; AMDGPU-DISABLED1: worker_state_machine.begin: -; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr -; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]]) -; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 -; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr -; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null -; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] -; AMDGPU-DISABLED1: worker_state_machine.finished: -; AMDGPU-DISABLED1-NEXT: ret void -; AMDGPU-DISABLED1: worker_state_machine.is_active.check: -; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] -; AMDGPU-DISABLED1: worker_state_machine.parallel_region.check: -; AMDGPU-DISABLED1-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__1_wrapper.ID -; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] -; AMDGPU-DISABLED1: worker_state_machine.parallel_region.execute: -; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP0]]) -; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; AMDGPU-DISABLED1: worker_state_machine.parallel_region.fallback.execute: -; AMDGPU-DISABLED1-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) -; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] -; AMDGPU-DISABLED1: worker_state_machine.parallel_region.end: -; AMDGPU-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel() -; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] -; AMDGPU-DISABLED1: worker_state_machine.done.barrier: -; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] -; AMDGPU-DISABLED1: thread.user_code.check: -; AMDGPU-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; AMDGPU-DISABLED1: common.ret: -; AMDGPU-DISABLED1-NEXT: ret void -; AMDGPU-DISABLED1: user_code.entry: -; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5:[0-9]+]] -; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]] -; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] -; AMDGPU-DISABLED1-NEXT: call void @__kmpc_target_deinit() -; AMDGPU-DISABLED1-NEXT: br label [[COMMON_RET]] -; -; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug -; AMDGPU-DISABLED2-SAME: () #[[ATTR1:[0-9]+]] { -; AMDGPU-DISABLED2-NEXT: entry: -; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment) -; AMDGPU-DISABLED2-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 -; AMDGPU-DISABLED2-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] -; AMDGPU-DISABLED2: is_worker_check: -; AMDGPU-DISABLED2-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -; AMDGPU-DISABLED2-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() -; AMDGPU-DISABLED2-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] -; AMDGPU-DISABLED2-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] -; AMDGPU-DISABLED2-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] -; AMDGPU-DISABLED2: worker_state_machine.begin: -; AMDGPU-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr -; AMDGPU-DISABLED2-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]]) -; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 -; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr -; AMDGPU-DISABLED2-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null -; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] -; AMDGPU-DISABLED2: worker_state_machine.finished: -; AMDGPU-DISABLED2-NEXT: ret void -; AMDGPU-DISABLED2: worker_state_machine.is_active.check: -; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] -; AMDGPU-DISABLED2: worker_state_machine.parallel_region.check: -; AMDGPU-DISABLED2-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__1_wrapper.ID -; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] -; AMDGPU-DISABLED2: worker_state_machine.parallel_region.execute: -; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP0]]) -; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; AMDGPU-DISABLED2: worker_state_machine.parallel_region.fallback.execute: -; AMDGPU-DISABLED2-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) -; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] -; AMDGPU-DISABLED2: worker_state_machine.parallel_region.end: -; AMDGPU-DISABLED2-NEXT: call void @__kmpc_kernel_end_parallel() -; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] -; AMDGPU-DISABLED2: worker_state_machine.done.barrier: -; AMDGPU-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] -; AMDGPU-DISABLED2: thread.user_code.check: -; AMDGPU-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; AMDGPU-DISABLED2: common.ret: -; AMDGPU-DISABLED2-NEXT: ret void -; AMDGPU-DISABLED2: user_code.entry: -; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5:[0-9]+]] -; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]] -; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] -; AMDGPU-DISABLED2-NEXT: call void @__kmpc_target_deinit() -; AMDGPU-DISABLED2-NEXT: br label [[COMMON_RET]] -; -; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug -; NVPTX-DISABLED1-SAME: () #[[ATTR1:[0-9]+]] { -; NVPTX-DISABLED1-NEXT: entry: -; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment) -; NVPTX-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 -; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] -; NVPTX-DISABLED1: is_worker_check: -; NVPTX-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -; NVPTX-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() -; NVPTX-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] -; NVPTX-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] -; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] -; NVPTX-DISABLED1: worker_state_machine.begin: -; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; NVPTX-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) -; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 -; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr -; NVPTX-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null -; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] -; NVPTX-DISABLED1: worker_state_machine.finished: -; NVPTX-DISABLED1-NEXT: ret void -; NVPTX-DISABLED1: worker_state_machine.is_active.check: -; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] -; NVPTX-DISABLED1: worker_state_machine.parallel_region.check: -; NVPTX-DISABLED1-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__1_wrapper.ID -; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] -; NVPTX-DISABLED1: worker_state_machine.parallel_region.execute: -; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP0]]) -; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; NVPTX-DISABLED1: worker_state_machine.parallel_region.fallback.execute: -; NVPTX-DISABLED1-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) -; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] -; NVPTX-DISABLED1: worker_state_machine.parallel_region.end: -; NVPTX-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel() -; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] -; NVPTX-DISABLED1: worker_state_machine.done.barrier: -; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] -; NVPTX-DISABLED1: thread.user_code.check: -; NVPTX-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; NVPTX-DISABLED1: common.ret: -; NVPTX-DISABLED1-NEXT: ret void -; NVPTX-DISABLED1: user_code.entry: -; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5:[0-9]+]] -; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]] -; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] -; NVPTX-DISABLED1-NEXT: call void @__kmpc_target_deinit() -; NVPTX-DISABLED1-NEXT: br label [[COMMON_RET]] -; -; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug -; NVPTX-DISABLED2-SAME: () #[[ATTR1:[0-9]+]] { -; NVPTX-DISABLED2-NEXT: entry: -; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment) -; NVPTX-DISABLED2-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 -; NVPTX-DISABLED2-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] -; NVPTX-DISABLED2: is_worker_check: -; NVPTX-DISABLED2-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -; NVPTX-DISABLED2-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() -; NVPTX-DISABLED2-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] -; NVPTX-DISABLED2-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] -; NVPTX-DISABLED2-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] -; NVPTX-DISABLED2: worker_state_machine.begin: -; NVPTX-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; NVPTX-DISABLED2-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) -; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 -; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr -; NVPTX-DISABLED2-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null -; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] -; NVPTX-DISABLED2: worker_state_machine.finished: -; NVPTX-DISABLED2-NEXT: ret void -; NVPTX-DISABLED2: worker_state_machine.is_active.check: -; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] -; NVPTX-DISABLED2: worker_state_machine.parallel_region.check: -; NVPTX-DISABLED2-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__1_wrapper.ID -; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] -; NVPTX-DISABLED2: worker_state_machine.parallel_region.execute: -; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP0]]) -; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; NVPTX-DISABLED2: worker_state_machine.parallel_region.fallback.execute: -; NVPTX-DISABLED2-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) -; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] -; NVPTX-DISABLED2: worker_state_machine.parallel_region.end: -; NVPTX-DISABLED2-NEXT: call void @__kmpc_kernel_end_parallel() -; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] -; NVPTX-DISABLED2: worker_state_machine.done.barrier: -; NVPTX-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] -; NVPTX-DISABLED2: thread.user_code.check: -; NVPTX-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; NVPTX-DISABLED2: common.ret: -; NVPTX-DISABLED2-NEXT: ret void -; NVPTX-DISABLED2: user_code.entry: -; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5:[0-9]+]] -; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]] -; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] -; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit() -; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]] -; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug ; AMDGPU-DISABLED-SAME: () #[[ATTR1:[0-9]+]] { ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU-DISABLED: is_worker_check: @@ -569,15 +303,16 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5:[0-9]+]] ; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]] ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit() +; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]] +; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug ; NVPTX-DISABLED-SAME: () #[[ATTR1:[0-9]+]] { ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX-DISABLED: is_worker_check: @@ -621,12 +356,13 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5:[0-9]+]] ; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]] ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit() +; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]] +; entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 - %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment) + %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %common.ret @@ -638,7 +374,7 @@ user_code.entry: ; preds = %entry store i32 0, ptr %.zero.addr, align 4 store i32 %1, ptr %.threadid_temp., align 4, !tbaa !18 call void @__omp_outlined__(ptr %.threadid_temp., ptr %.zero.addr) #6 - call void @__kmpc_target_deinit() + call void @__kmpc_target_deinit(ptr @1, i8 1) br label %common.ret } @@ -680,78 +416,6 @@ define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %. ; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 ; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] ; -; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__ -; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU-DISABLED1-NEXT: entry: -; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND:%.*]] -; AMDGPU-DISABLED1: for.cond: -; AMDGPU-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] -; AMDGPU-DISABLED1-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 -; AMDGPU-DISABLED1-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] -; AMDGPU-DISABLED1: for.cond.cleanup: -; AMDGPU-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR8:[0-9]+]] -; AMDGPU-DISABLED1-NEXT: ret void -; AMDGPU-DISABLED1: for.body: -; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] -; AMDGPU-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] -; -; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__ -; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU-DISABLED2-NEXT: entry: -; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND:%.*]] -; AMDGPU-DISABLED2: for.cond: -; AMDGPU-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] -; AMDGPU-DISABLED2-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 -; AMDGPU-DISABLED2-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] -; AMDGPU-DISABLED2: for.cond.cleanup: -; AMDGPU-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR8:[0-9]+]] -; AMDGPU-DISABLED2-NEXT: ret void -; AMDGPU-DISABLED2: for.body: -; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] -; AMDGPU-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] -; -; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__ -; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX-DISABLED1-NEXT: entry: -; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; NVPTX-DISABLED1-NEXT: br label [[FOR_COND:%.*]] -; NVPTX-DISABLED1: for.cond: -; NVPTX-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] -; NVPTX-DISABLED1-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 -; NVPTX-DISABLED1-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] -; NVPTX-DISABLED1: for.cond.cleanup: -; NVPTX-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR8:[0-9]+]] -; NVPTX-DISABLED1-NEXT: ret void -; NVPTX-DISABLED1: for.body: -; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] -; NVPTX-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; NVPTX-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] -; -; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__ -; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX-DISABLED2-NEXT: entry: -; NVPTX-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; NVPTX-DISABLED2-NEXT: br label [[FOR_COND:%.*]] -; NVPTX-DISABLED2: for.cond: -; NVPTX-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] -; NVPTX-DISABLED2-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 -; NVPTX-DISABLED2-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] -; NVPTX-DISABLED2: for.cond.cleanup: -; NVPTX-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR8:[0-9]+]] -; NVPTX-DISABLED2-NEXT: ret void -; NVPTX-DISABLED2: for.body: -; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] -; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; NVPTX-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] -; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__ ; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: @@ -769,6 +433,7 @@ define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %. ; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 ; AMDGPU-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__ ; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: @@ -786,6 +451,7 @@ define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %. ; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; NVPTX-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 ; NVPTX-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +; entry: %captured_vars_addrs = alloca [0 x ptr], align 8 br label %for.cond @@ -820,40 +486,18 @@ define internal void @__omp_outlined__1(ptr noalias %.global_tid., ptr noalias % ; NVPTX-NEXT: call void @unknown() #[[ATTR9:[0-9]+]] ; NVPTX-NEXT: ret void ; -; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__1 -; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU-DISABLED1-NEXT: entry: -; AMDGPU-DISABLED1-NEXT: call void @unknown() #[[ATTR9:[0-9]+]] -; AMDGPU-DISABLED1-NEXT: ret void -; -; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__1 -; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU-DISABLED2-NEXT: entry: -; AMDGPU-DISABLED2-NEXT: call void @unknown() #[[ATTR9:[0-9]+]] -; AMDGPU-DISABLED2-NEXT: ret void -; -; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__1 -; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX-DISABLED1-NEXT: entry: -; NVPTX-DISABLED1-NEXT: call void @unknown() #[[ATTR9:[0-9]+]] -; NVPTX-DISABLED1-NEXT: ret void -; -; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__1 -; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX-DISABLED2-NEXT: entry: -; NVPTX-DISABLED2-NEXT: call void @unknown() #[[ATTR9:[0-9]+]] -; NVPTX-DISABLED2-NEXT: ret void -; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1 ; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR9:[0-9]+]] ; AMDGPU-DISABLED-NEXT: ret void +; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1 ; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR9:[0-9]+]] ; NVPTX-DISABLED-NEXT: ret void +; entry: call void @unknown() #11 ret void @@ -881,46 +525,6 @@ define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] ; NVPTX-NEXT: ret void ; -; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper -; AMDGPU-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] { -; AMDGPU-DISABLED1-NEXT: entry: -; AMDGPU-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; AMDGPU-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] -; AMDGPU-DISABLED1-NEXT: ret void -; -; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper -; AMDGPU-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] { -; AMDGPU-DISABLED2-NEXT: entry: -; AMDGPU-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; AMDGPU-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] -; AMDGPU-DISABLED2-NEXT: ret void -; -; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper -; NVPTX-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] { -; NVPTX-DISABLED1-NEXT: entry: -; NVPTX-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; NVPTX-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] -; NVPTX-DISABLED1-NEXT: ret void -; -; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper -; NVPTX-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] { -; NVPTX-DISABLED2-NEXT: entry: -; NVPTX-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; NVPTX-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] -; NVPTX-DISABLED2-NEXT: ret void -; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper ; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] { ; AMDGPU-DISABLED-NEXT: entry: @@ -930,6 +534,7 @@ define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] ; AMDGPU-DISABLED-NEXT: ret void +; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper ; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] { ; NVPTX-DISABLED-NEXT: entry: @@ -939,6 +544,7 @@ define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] ; NVPTX-DISABLED-NEXT: ret void +; entry: %.addr1 = alloca i32, align 4 %.zero.addr = alloca i32, align 4 @@ -957,7 +563,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l2 ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] ; AMDGPU: common.ret: @@ -966,7 +572,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l2 ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5]] ; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] ; AMDGPU-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit() +; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) ; AMDGPU-NEXT: br label [[COMMON_RET]] ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20 @@ -974,7 +580,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l2 ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] ; NVPTX: common.ret: @@ -983,230 +589,16 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l2 ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5]] ; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] ; NVPTX-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] -; NVPTX-NEXT: call void @__kmpc_target_deinit() +; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) ; NVPTX-NEXT: br label [[COMMON_RET]] ; -; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20 -; AMDGPU-DISABLED1-SAME: () #[[ATTR0]] { -; AMDGPU-DISABLED1-NEXT: entry: -; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment) -; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 -; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] -; AMDGPU-DISABLED1: is_worker_check: -; AMDGPU-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -; AMDGPU-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() -; AMDGPU-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] -; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] -; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] -; AMDGPU-DISABLED1: worker_state_machine.begin: -; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr -; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]]) -; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 -; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr -; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null -; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] -; AMDGPU-DISABLED1: worker_state_machine.finished: -; AMDGPU-DISABLED1-NEXT: ret void -; AMDGPU-DISABLED1: worker_state_machine.is_active.check: -; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] -; AMDGPU-DISABLED1: worker_state_machine.parallel_region.check: -; AMDGPU-DISABLED1-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__3_wrapper.ID -; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] -; AMDGPU-DISABLED1: worker_state_machine.parallel_region.execute: -; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP0]]) -; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; AMDGPU-DISABLED1: worker_state_machine.parallel_region.fallback.execute: -; AMDGPU-DISABLED1-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) -; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] -; AMDGPU-DISABLED1: worker_state_machine.parallel_region.end: -; AMDGPU-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel() -; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] -; AMDGPU-DISABLED1: worker_state_machine.done.barrier: -; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] -; AMDGPU-DISABLED1: thread.user_code.check: -; AMDGPU-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; AMDGPU-DISABLED1: common.ret: -; AMDGPU-DISABLED1-NEXT: ret void -; AMDGPU-DISABLED1: user_code.entry: -; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5]] -; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] -; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] -; AMDGPU-DISABLED1-NEXT: call void @__kmpc_target_deinit() -; AMDGPU-DISABLED1-NEXT: br label [[COMMON_RET]] -; -; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20 -; AMDGPU-DISABLED2-SAME: () #[[ATTR0]] { -; AMDGPU-DISABLED2-NEXT: entry: -; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment) -; AMDGPU-DISABLED2-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 -; AMDGPU-DISABLED2-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] -; AMDGPU-DISABLED2: is_worker_check: -; AMDGPU-DISABLED2-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -; AMDGPU-DISABLED2-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() -; AMDGPU-DISABLED2-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] -; AMDGPU-DISABLED2-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] -; AMDGPU-DISABLED2-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] -; AMDGPU-DISABLED2: worker_state_machine.begin: -; AMDGPU-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr -; AMDGPU-DISABLED2-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]]) -; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 -; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr -; AMDGPU-DISABLED2-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null -; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] -; AMDGPU-DISABLED2: worker_state_machine.finished: -; AMDGPU-DISABLED2-NEXT: ret void -; AMDGPU-DISABLED2: worker_state_machine.is_active.check: -; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] -; AMDGPU-DISABLED2: worker_state_machine.parallel_region.check: -; AMDGPU-DISABLED2-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__3_wrapper.ID -; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] -; AMDGPU-DISABLED2: worker_state_machine.parallel_region.execute: -; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP0]]) -; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; AMDGPU-DISABLED2: worker_state_machine.parallel_region.fallback.execute: -; AMDGPU-DISABLED2-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) -; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] -; AMDGPU-DISABLED2: worker_state_machine.parallel_region.end: -; AMDGPU-DISABLED2-NEXT: call void @__kmpc_kernel_end_parallel() -; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] -; AMDGPU-DISABLED2: worker_state_machine.done.barrier: -; AMDGPU-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] -; AMDGPU-DISABLED2: thread.user_code.check: -; AMDGPU-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; AMDGPU-DISABLED2: common.ret: -; AMDGPU-DISABLED2-NEXT: ret void -; AMDGPU-DISABLED2: user_code.entry: -; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5]] -; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] -; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] -; AMDGPU-DISABLED2-NEXT: call void @__kmpc_target_deinit() -; AMDGPU-DISABLED2-NEXT: br label [[COMMON_RET]] -; -; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20 -; NVPTX-DISABLED1-SAME: () #[[ATTR0]] { -; NVPTX-DISABLED1-NEXT: entry: -; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment) -; NVPTX-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 -; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] -; NVPTX-DISABLED1: is_worker_check: -; NVPTX-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -; NVPTX-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() -; NVPTX-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] -; NVPTX-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] -; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] -; NVPTX-DISABLED1: worker_state_machine.begin: -; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; NVPTX-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) -; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 -; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr -; NVPTX-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null -; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] -; NVPTX-DISABLED1: worker_state_machine.finished: -; NVPTX-DISABLED1-NEXT: ret void -; NVPTX-DISABLED1: worker_state_machine.is_active.check: -; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] -; NVPTX-DISABLED1: worker_state_machine.parallel_region.check: -; NVPTX-DISABLED1-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__3_wrapper.ID -; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] -; NVPTX-DISABLED1: worker_state_machine.parallel_region.execute: -; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP0]]) -; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; NVPTX-DISABLED1: worker_state_machine.parallel_region.fallback.execute: -; NVPTX-DISABLED1-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) -; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] -; NVPTX-DISABLED1: worker_state_machine.parallel_region.end: -; NVPTX-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel() -; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] -; NVPTX-DISABLED1: worker_state_machine.done.barrier: -; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] -; NVPTX-DISABLED1: thread.user_code.check: -; NVPTX-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; NVPTX-DISABLED1: common.ret: -; NVPTX-DISABLED1-NEXT: ret void -; NVPTX-DISABLED1: user_code.entry: -; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5]] -; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] -; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] -; NVPTX-DISABLED1-NEXT: call void @__kmpc_target_deinit() -; NVPTX-DISABLED1-NEXT: br label [[COMMON_RET]] -; -; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20 -; NVPTX-DISABLED2-SAME: () #[[ATTR0]] { -; NVPTX-DISABLED2-NEXT: entry: -; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment) -; NVPTX-DISABLED2-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 -; NVPTX-DISABLED2-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] -; NVPTX-DISABLED2: is_worker_check: -; NVPTX-DISABLED2-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -; NVPTX-DISABLED2-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() -; NVPTX-DISABLED2-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] -; NVPTX-DISABLED2-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] -; NVPTX-DISABLED2-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] -; NVPTX-DISABLED2: worker_state_machine.begin: -; NVPTX-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; NVPTX-DISABLED2-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) -; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 -; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr -; NVPTX-DISABLED2-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null -; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] -; NVPTX-DISABLED2: worker_state_machine.finished: -; NVPTX-DISABLED2-NEXT: ret void -; NVPTX-DISABLED2: worker_state_machine.is_active.check: -; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] -; NVPTX-DISABLED2: worker_state_machine.parallel_region.check: -; NVPTX-DISABLED2-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__3_wrapper.ID -; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] -; NVPTX-DISABLED2: worker_state_machine.parallel_region.execute: -; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP0]]) -; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; NVPTX-DISABLED2: worker_state_machine.parallel_region.fallback.execute: -; NVPTX-DISABLED2-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) -; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] -; NVPTX-DISABLED2: worker_state_machine.parallel_region.end: -; NVPTX-DISABLED2-NEXT: call void @__kmpc_kernel_end_parallel() -; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] -; NVPTX-DISABLED2: worker_state_machine.done.barrier: -; NVPTX-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] -; NVPTX-DISABLED2: thread.user_code.check: -; NVPTX-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; NVPTX-DISABLED2: common.ret: -; NVPTX-DISABLED2-NEXT: ret void -; NVPTX-DISABLED2: user_code.entry: -; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5]] -; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] -; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] -; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit() -; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]] -; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20 ; AMDGPU-DISABLED-SAME: () #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU-DISABLED: is_worker_check: @@ -1251,15 +643,16 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l2 ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5]] ; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit() +; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]] +; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20 ; NVPTX-DISABLED-SAME: () #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX-DISABLED: is_worker_check: @@ -1303,12 +696,13 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l2 ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5]] ; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit() +; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]] +; entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 - %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment) + %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %common.ret @@ -1320,7 +714,7 @@ user_code.entry: ; preds = %entry store i32 0, ptr %.zero.addr, align 4 store i32 %1, ptr %.threadid_temp., align 4, !tbaa !18 call void @__omp_outlined__2(ptr %.threadid_temp., ptr %.zero.addr) #6 - call void @__kmpc_target_deinit() + call void @__kmpc_target_deinit(ptr @1, i8 1) br label %common.ret } @@ -1367,88 +761,6 @@ define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias % ; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 ; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] ; -; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__2 -; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU-DISABLED1-NEXT: entry: -; AMDGPU-DISABLED1-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4, addrspace(5) -; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU-DISABLED1-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast ptr addrspace(5) [[X_H2S]] to ptr -; AMDGPU-DISABLED1-NEXT: call void @use(ptr nocapture [[MALLOC_CAST]]) #[[ATTR8]] -; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND:%.*]] -; AMDGPU-DISABLED1: for.cond: -; AMDGPU-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] -; AMDGPU-DISABLED1-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 -; AMDGPU-DISABLED1-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] -; AMDGPU-DISABLED1: for.cond.cleanup: -; AMDGPU-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR8]] -; AMDGPU-DISABLED1-NEXT: ret void -; AMDGPU-DISABLED1: for.body: -; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] -; AMDGPU-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] -; -; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__2 -; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU-DISABLED2-NEXT: entry: -; AMDGPU-DISABLED2-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4, addrspace(5) -; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU-DISABLED2-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast ptr addrspace(5) [[X_H2S]] to ptr -; AMDGPU-DISABLED2-NEXT: call void @use(ptr nocapture [[MALLOC_CAST]]) #[[ATTR8]] -; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND:%.*]] -; AMDGPU-DISABLED2: for.cond: -; AMDGPU-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] -; AMDGPU-DISABLED2-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 -; AMDGPU-DISABLED2-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] -; AMDGPU-DISABLED2: for.cond.cleanup: -; AMDGPU-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR8]] -; AMDGPU-DISABLED2-NEXT: ret void -; AMDGPU-DISABLED2: for.body: -; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] -; AMDGPU-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] -; -; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__2 -; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX-DISABLED1-NEXT: entry: -; NVPTX-DISABLED1-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4 -; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; NVPTX-DISABLED1-NEXT: call void @use(ptr nocapture [[X_H2S]]) #[[ATTR8]] -; NVPTX-DISABLED1-NEXT: br label [[FOR_COND:%.*]] -; NVPTX-DISABLED1: for.cond: -; NVPTX-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] -; NVPTX-DISABLED1-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 -; NVPTX-DISABLED1-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] -; NVPTX-DISABLED1: for.cond.cleanup: -; NVPTX-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR8]] -; NVPTX-DISABLED1-NEXT: ret void -; NVPTX-DISABLED1: for.body: -; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] -; NVPTX-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; NVPTX-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] -; -; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__2 -; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX-DISABLED2-NEXT: entry: -; NVPTX-DISABLED2-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4 -; NVPTX-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; NVPTX-DISABLED2-NEXT: call void @use(ptr nocapture [[X_H2S]]) #[[ATTR8]] -; NVPTX-DISABLED2-NEXT: br label [[FOR_COND:%.*]] -; NVPTX-DISABLED2: for.cond: -; NVPTX-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] -; NVPTX-DISABLED2-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 -; NVPTX-DISABLED2-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] -; NVPTX-DISABLED2: for.cond.cleanup: -; NVPTX-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR8]] -; NVPTX-DISABLED2-NEXT: ret void -; NVPTX-DISABLED2: for.body: -; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] -; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; NVPTX-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] -; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2 ; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: @@ -1469,6 +781,7 @@ define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias % ; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 ; AMDGPU-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2 ; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: @@ -1488,6 +801,7 @@ define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias % ; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; NVPTX-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 ; NVPTX-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +; entry: %captured_vars_addrs = alloca [0 x ptr], align 8 %x = call align 4 ptr @__kmpc_alloc_shared(i64 4) @@ -1524,40 +838,18 @@ define internal void @__omp_outlined__3(ptr noalias %.global_tid., ptr noalias % ; NVPTX-NEXT: call void @unknown() #[[ATTR9]] ; NVPTX-NEXT: ret void ; -; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__3 -; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU-DISABLED1-NEXT: entry: -; AMDGPU-DISABLED1-NEXT: call void @unknown() #[[ATTR9]] -; AMDGPU-DISABLED1-NEXT: ret void -; -; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__3 -; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU-DISABLED2-NEXT: entry: -; AMDGPU-DISABLED2-NEXT: call void @unknown() #[[ATTR9]] -; AMDGPU-DISABLED2-NEXT: ret void -; -; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__3 -; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX-DISABLED1-NEXT: entry: -; NVPTX-DISABLED1-NEXT: call void @unknown() #[[ATTR9]] -; NVPTX-DISABLED1-NEXT: ret void -; -; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__3 -; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX-DISABLED2-NEXT: entry: -; NVPTX-DISABLED2-NEXT: call void @unknown() #[[ATTR9]] -; NVPTX-DISABLED2-NEXT: ret void -; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3 ; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR9]] ; AMDGPU-DISABLED-NEXT: ret void +; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3 ; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR9]] ; NVPTX-DISABLED-NEXT: ret void +; entry: call void @unknown() #11 ret void @@ -1585,46 +877,6 @@ define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] ; NVPTX-NEXT: ret void ; -; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper -; AMDGPU-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { -; AMDGPU-DISABLED1-NEXT: entry: -; AMDGPU-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; AMDGPU-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] -; AMDGPU-DISABLED1-NEXT: ret void -; -; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper -; AMDGPU-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { -; AMDGPU-DISABLED2-NEXT: entry: -; AMDGPU-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; AMDGPU-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] -; AMDGPU-DISABLED2-NEXT: ret void -; -; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper -; NVPTX-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { -; NVPTX-DISABLED1-NEXT: entry: -; NVPTX-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; NVPTX-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] -; NVPTX-DISABLED1-NEXT: ret void -; -; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper -; NVPTX-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { -; NVPTX-DISABLED2-NEXT: entry: -; NVPTX-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; NVPTX-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] -; NVPTX-DISABLED2-NEXT: ret void -; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper ; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { ; AMDGPU-DISABLED-NEXT: entry: @@ -1634,6 +886,7 @@ define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] ; AMDGPU-DISABLED-NEXT: ret void +; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper ; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { ; NVPTX-DISABLED-NEXT: entry: @@ -1643,6 +896,7 @@ define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] ; NVPTX-DISABLED-NEXT: ret void +; entry: %.addr1 = alloca i32, align 4 %.zero.addr = alloca i32, align 4 @@ -1662,7 +916,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] ; AMDGPU: common.ret: @@ -1671,7 +925,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5]] ; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] ; AMDGPU-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit() +; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) ; AMDGPU-NEXT: br label [[COMMON_RET]] ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35 @@ -1679,7 +933,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] ; NVPTX: common.ret: @@ -1688,230 +942,16 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5]] ; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] ; NVPTX-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] -; NVPTX-NEXT: call void @__kmpc_target_deinit() +; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) ; NVPTX-NEXT: br label [[COMMON_RET]] ; -; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35 -; AMDGPU-DISABLED1-SAME: () #[[ATTR0]] { -; AMDGPU-DISABLED1-NEXT: entry: -; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment) -; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 -; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] -; AMDGPU-DISABLED1: is_worker_check: -; AMDGPU-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -; AMDGPU-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() -; AMDGPU-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] -; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] -; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] -; AMDGPU-DISABLED1: worker_state_machine.begin: -; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr -; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]]) -; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 -; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr -; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null -; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] -; AMDGPU-DISABLED1: worker_state_machine.finished: -; AMDGPU-DISABLED1-NEXT: ret void -; AMDGPU-DISABLED1: worker_state_machine.is_active.check: -; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] -; AMDGPU-DISABLED1: worker_state_machine.parallel_region.check: -; AMDGPU-DISABLED1-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__5_wrapper.ID -; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] -; AMDGPU-DISABLED1: worker_state_machine.parallel_region.execute: -; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP0]]) -; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; AMDGPU-DISABLED1: worker_state_machine.parallel_region.fallback.execute: -; AMDGPU-DISABLED1-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) -; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] -; AMDGPU-DISABLED1: worker_state_machine.parallel_region.end: -; AMDGPU-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel() -; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] -; AMDGPU-DISABLED1: worker_state_machine.done.barrier: -; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] -; AMDGPU-DISABLED1: thread.user_code.check: -; AMDGPU-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; AMDGPU-DISABLED1: common.ret: -; AMDGPU-DISABLED1-NEXT: ret void -; AMDGPU-DISABLED1: user_code.entry: -; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5]] -; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] -; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] -; AMDGPU-DISABLED1-NEXT: call void @__kmpc_target_deinit() -; AMDGPU-DISABLED1-NEXT: br label [[COMMON_RET]] -; -; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35 -; AMDGPU-DISABLED2-SAME: () #[[ATTR0]] { -; AMDGPU-DISABLED2-NEXT: entry: -; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment) -; AMDGPU-DISABLED2-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 -; AMDGPU-DISABLED2-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] -; AMDGPU-DISABLED2: is_worker_check: -; AMDGPU-DISABLED2-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -; AMDGPU-DISABLED2-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() -; AMDGPU-DISABLED2-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] -; AMDGPU-DISABLED2-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] -; AMDGPU-DISABLED2-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] -; AMDGPU-DISABLED2: worker_state_machine.begin: -; AMDGPU-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr -; AMDGPU-DISABLED2-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]]) -; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 -; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr -; AMDGPU-DISABLED2-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null -; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] -; AMDGPU-DISABLED2: worker_state_machine.finished: -; AMDGPU-DISABLED2-NEXT: ret void -; AMDGPU-DISABLED2: worker_state_machine.is_active.check: -; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] -; AMDGPU-DISABLED2: worker_state_machine.parallel_region.check: -; AMDGPU-DISABLED2-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__5_wrapper.ID -; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] -; AMDGPU-DISABLED2: worker_state_machine.parallel_region.execute: -; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP0]]) -; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; AMDGPU-DISABLED2: worker_state_machine.parallel_region.fallback.execute: -; AMDGPU-DISABLED2-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) -; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] -; AMDGPU-DISABLED2: worker_state_machine.parallel_region.end: -; AMDGPU-DISABLED2-NEXT: call void @__kmpc_kernel_end_parallel() -; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] -; AMDGPU-DISABLED2: worker_state_machine.done.barrier: -; AMDGPU-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] -; AMDGPU-DISABLED2: thread.user_code.check: -; AMDGPU-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; AMDGPU-DISABLED2: common.ret: -; AMDGPU-DISABLED2-NEXT: ret void -; AMDGPU-DISABLED2: user_code.entry: -; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5]] -; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] -; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] -; AMDGPU-DISABLED2-NEXT: call void @__kmpc_target_deinit() -; AMDGPU-DISABLED2-NEXT: br label [[COMMON_RET]] -; -; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35 -; NVPTX-DISABLED1-SAME: () #[[ATTR0]] { -; NVPTX-DISABLED1-NEXT: entry: -; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment) -; NVPTX-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 -; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] -; NVPTX-DISABLED1: is_worker_check: -; NVPTX-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -; NVPTX-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() -; NVPTX-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] -; NVPTX-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] -; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] -; NVPTX-DISABLED1: worker_state_machine.begin: -; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; NVPTX-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) -; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 -; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr -; NVPTX-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null -; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] -; NVPTX-DISABLED1: worker_state_machine.finished: -; NVPTX-DISABLED1-NEXT: ret void -; NVPTX-DISABLED1: worker_state_machine.is_active.check: -; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] -; NVPTX-DISABLED1: worker_state_machine.parallel_region.check: -; NVPTX-DISABLED1-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__5_wrapper.ID -; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] -; NVPTX-DISABLED1: worker_state_machine.parallel_region.execute: -; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP0]]) -; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; NVPTX-DISABLED1: worker_state_machine.parallel_region.fallback.execute: -; NVPTX-DISABLED1-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) -; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] -; NVPTX-DISABLED1: worker_state_machine.parallel_region.end: -; NVPTX-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel() -; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] -; NVPTX-DISABLED1: worker_state_machine.done.barrier: -; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] -; NVPTX-DISABLED1: thread.user_code.check: -; NVPTX-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; NVPTX-DISABLED1: common.ret: -; NVPTX-DISABLED1-NEXT: ret void -; NVPTX-DISABLED1: user_code.entry: -; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5]] -; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] -; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] -; NVPTX-DISABLED1-NEXT: call void @__kmpc_target_deinit() -; NVPTX-DISABLED1-NEXT: br label [[COMMON_RET]] -; -; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35 -; NVPTX-DISABLED2-SAME: () #[[ATTR0]] { -; NVPTX-DISABLED2-NEXT: entry: -; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment) -; NVPTX-DISABLED2-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 -; NVPTX-DISABLED2-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] -; NVPTX-DISABLED2: is_worker_check: -; NVPTX-DISABLED2-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -; NVPTX-DISABLED2-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() -; NVPTX-DISABLED2-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] -; NVPTX-DISABLED2-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] -; NVPTX-DISABLED2-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] -; NVPTX-DISABLED2: worker_state_machine.begin: -; NVPTX-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; NVPTX-DISABLED2-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) -; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 -; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr -; NVPTX-DISABLED2-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null -; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] -; NVPTX-DISABLED2: worker_state_machine.finished: -; NVPTX-DISABLED2-NEXT: ret void -; NVPTX-DISABLED2: worker_state_machine.is_active.check: -; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] -; NVPTX-DISABLED2: worker_state_machine.parallel_region.check: -; NVPTX-DISABLED2-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__5_wrapper.ID -; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] -; NVPTX-DISABLED2: worker_state_machine.parallel_region.execute: -; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP0]]) -; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; NVPTX-DISABLED2: worker_state_machine.parallel_region.fallback.execute: -; NVPTX-DISABLED2-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) -; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] -; NVPTX-DISABLED2: worker_state_machine.parallel_region.end: -; NVPTX-DISABLED2-NEXT: call void @__kmpc_kernel_end_parallel() -; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] -; NVPTX-DISABLED2: worker_state_machine.done.barrier: -; NVPTX-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] -; NVPTX-DISABLED2: thread.user_code.check: -; NVPTX-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; NVPTX-DISABLED2: common.ret: -; NVPTX-DISABLED2-NEXT: ret void -; NVPTX-DISABLED2: user_code.entry: -; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5]] -; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] -; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] -; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit() -; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]] -; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35 ; AMDGPU-DISABLED-SAME: () #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU-DISABLED: is_worker_check: @@ -1956,15 +996,16 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5]] ; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit() +; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]] +; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35 ; NVPTX-DISABLED-SAME: () #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX-DISABLED: is_worker_check: @@ -2008,12 +1049,13 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5]] ; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit() +; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]] +; entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 - %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment) + %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %common.ret @@ -2025,7 +1067,7 @@ user_code.entry: ; preds = %entry store i32 0, ptr %.zero.addr, align 4 store i32 %1, ptr %.threadid_temp., align 4, !tbaa !18 call void @__omp_outlined__4(ptr %.threadid_temp., ptr %.zero.addr) #6 - call void @__kmpc_target_deinit() + call void @__kmpc_target_deinit(ptr @1, i8 1) br label %common.ret } @@ -2069,82 +1111,6 @@ define internal void @__omp_outlined__4(ptr noalias %.global_tid., ptr noalias % ; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 ; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] ; -; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__4 -; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU-DISABLED1-NEXT: entry: -; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 -; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND:%.*]] -; AMDGPU-DISABLED1: for.cond: -; AMDGPU-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] -; AMDGPU-DISABLED1-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 -; AMDGPU-DISABLED1-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] -; AMDGPU-DISABLED1: for.cond.cleanup: -; AMDGPU-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR8]] -; AMDGPU-DISABLED1-NEXT: ret void -; AMDGPU-DISABLED1: for.body: -; AMDGPU-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26:![0-9]+]] -; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] -; AMDGPU-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1) -; AMDGPU-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] -; -; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__4 -; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU-DISABLED2-NEXT: entry: -; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 -; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND:%.*]] -; AMDGPU-DISABLED2: for.cond: -; AMDGPU-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] -; AMDGPU-DISABLED2-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 -; AMDGPU-DISABLED2-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] -; AMDGPU-DISABLED2: for.cond.cleanup: -; AMDGPU-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR8]] -; AMDGPU-DISABLED2-NEXT: ret void -; AMDGPU-DISABLED2: for.body: -; AMDGPU-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26:![0-9]+]] -; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] -; AMDGPU-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1) -; AMDGPU-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] -; -; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__4 -; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX-DISABLED1-NEXT: entry: -; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 -; NVPTX-DISABLED1-NEXT: br label [[FOR_COND:%.*]] -; NVPTX-DISABLED1: for.cond: -; NVPTX-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] -; NVPTX-DISABLED1-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 -; NVPTX-DISABLED1-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] -; NVPTX-DISABLED1: for.cond.cleanup: -; NVPTX-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR8]] -; NVPTX-DISABLED1-NEXT: ret void -; NVPTX-DISABLED1: for.body: -; NVPTX-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26:![0-9]+]] -; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] -; NVPTX-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1) -; NVPTX-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; NVPTX-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] -; -; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__4 -; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX-DISABLED2-NEXT: entry: -; NVPTX-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 -; NVPTX-DISABLED2-NEXT: br label [[FOR_COND:%.*]] -; NVPTX-DISABLED2: for.cond: -; NVPTX-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] -; NVPTX-DISABLED2-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 -; NVPTX-DISABLED2-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] -; NVPTX-DISABLED2: for.cond.cleanup: -; NVPTX-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR8]] -; NVPTX-DISABLED2-NEXT: ret void -; NVPTX-DISABLED2: for.body: -; NVPTX-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26:![0-9]+]] -; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] -; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1) -; NVPTX-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; NVPTX-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] -; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__4 ; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: @@ -2163,6 +1129,7 @@ define internal void @__omp_outlined__4(ptr noalias %.global_tid., ptr noalias % ; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1) ; AMDGPU-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 ; AMDGPU-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__4 ; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: @@ -2181,6 +1148,7 @@ define internal void @__omp_outlined__4(ptr noalias %.global_tid., ptr noalias % ; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1) ; NVPTX-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 ; NVPTX-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +; entry: %captured_vars_addrs = alloca [1 x ptr], align 8 %x = call align 4 ptr @__kmpc_alloc_shared(i64 4) @@ -2224,42 +1192,6 @@ define internal void @__omp_outlined__5(ptr noalias %.global_tid., ptr noalias % ; NVPTX-NEXT: call void @unknown() #[[ATTR9]] ; NVPTX-NEXT: ret void ; -; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__5 -; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { -; AMDGPU-DISABLED1-NEXT: entry: -; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]] -; AMDGPU-DISABLED1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 -; AMDGPU-DISABLED1-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]] -; AMDGPU-DISABLED1-NEXT: call void @unknown() #[[ATTR9]] -; AMDGPU-DISABLED1-NEXT: ret void -; -; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__5 -; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { -; AMDGPU-DISABLED2-NEXT: entry: -; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]] -; AMDGPU-DISABLED2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 -; AMDGPU-DISABLED2-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]] -; AMDGPU-DISABLED2-NEXT: call void @unknown() #[[ATTR9]] -; AMDGPU-DISABLED2-NEXT: ret void -; -; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__5 -; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { -; NVPTX-DISABLED1-NEXT: entry: -; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]] -; NVPTX-DISABLED1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 -; NVPTX-DISABLED1-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]] -; NVPTX-DISABLED1-NEXT: call void @unknown() #[[ATTR9]] -; NVPTX-DISABLED1-NEXT: ret void -; -; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__5 -; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { -; NVPTX-DISABLED2-NEXT: entry: -; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]] -; NVPTX-DISABLED2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 -; NVPTX-DISABLED2-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]] -; NVPTX-DISABLED2-NEXT: call void @unknown() #[[ATTR9]] -; NVPTX-DISABLED2-NEXT: ret void -; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5 ; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: @@ -2268,6 +1200,7 @@ define internal void @__omp_outlined__5(ptr noalias %.global_tid., ptr noalias % ; AMDGPU-DISABLED-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]] ; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR9]] ; AMDGPU-DISABLED-NEXT: ret void +; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5 ; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: @@ -2276,6 +1209,7 @@ define internal void @__omp_outlined__5(ptr noalias %.global_tid., ptr noalias % ; NVPTX-DISABLED-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]] ; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR9]] ; NVPTX-DISABLED-NEXT: ret void +; entry: %0 = load i32, ptr %x, align 4, !tbaa !18 %inc = add nsw i32 %0, 1 @@ -2310,54 +1244,6 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR5]] ; NVPTX-NEXT: ret void ; -; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper -; AMDGPU-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { -; AMDGPU-DISABLED1-NEXT: entry: -; AMDGPU-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; AMDGPU-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU-DISABLED1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 -; AMDGPU-DISABLED1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]] -; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR5]] -; AMDGPU-DISABLED1-NEXT: ret void -; -; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper -; AMDGPU-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { -; AMDGPU-DISABLED2-NEXT: entry: -; AMDGPU-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; AMDGPU-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU-DISABLED2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 -; AMDGPU-DISABLED2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]] -; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR5]] -; AMDGPU-DISABLED2-NEXT: ret void -; -; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper -; NVPTX-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { -; NVPTX-DISABLED1-NEXT: entry: -; NVPTX-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; NVPTX-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX-DISABLED1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 -; NVPTX-DISABLED1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]] -; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR5]] -; NVPTX-DISABLED1-NEXT: ret void -; -; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper -; NVPTX-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { -; NVPTX-DISABLED2-NEXT: entry: -; NVPTX-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; NVPTX-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX-DISABLED2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 -; NVPTX-DISABLED2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]] -; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR5]] -; NVPTX-DISABLED2-NEXT: ret void -; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper ; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { ; AMDGPU-DISABLED-NEXT: entry: @@ -2369,6 +1255,7 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]] ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR5]] ; AMDGPU-DISABLED-NEXT: ret void +; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper ; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { ; NVPTX-DISABLED-NEXT: entry: @@ -2380,6 +1267,7 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]] ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR5]] ; NVPTX-DISABLED-NEXT: ret void +; entry: %.addr1 = alloca i32, align 4 %.zero.addr = alloca i32, align 4 @@ -2400,7 +1288,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_g ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] ; AMDGPU: common.ret: @@ -2409,7 +1297,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_g ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5]] ; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] ; AMDGPU-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit() +; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) ; AMDGPU-NEXT: br label [[COMMON_RET]] ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50 @@ -2417,7 +1305,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_g ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] ; NVPTX: common.ret: @@ -2426,230 +1314,16 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_g ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5]] ; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] ; NVPTX-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] -; NVPTX-NEXT: call void @__kmpc_target_deinit() +; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) ; NVPTX-NEXT: br label [[COMMON_RET]] ; -; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50 -; AMDGPU-DISABLED1-SAME: () #[[ATTR0]] { -; AMDGPU-DISABLED1-NEXT: entry: -; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment) -; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 -; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] -; AMDGPU-DISABLED1: is_worker_check: -; AMDGPU-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -; AMDGPU-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() -; AMDGPU-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] -; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] -; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] -; AMDGPU-DISABLED1: worker_state_machine.begin: -; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr -; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]]) -; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 -; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr -; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null -; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] -; AMDGPU-DISABLED1: worker_state_machine.finished: -; AMDGPU-DISABLED1-NEXT: ret void -; AMDGPU-DISABLED1: worker_state_machine.is_active.check: -; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] -; AMDGPU-DISABLED1: worker_state_machine.parallel_region.check: -; AMDGPU-DISABLED1-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__7_wrapper.ID -; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] -; AMDGPU-DISABLED1: worker_state_machine.parallel_region.execute: -; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]]) -; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; AMDGPU-DISABLED1: worker_state_machine.parallel_region.fallback.execute: -; AMDGPU-DISABLED1-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) -; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] -; AMDGPU-DISABLED1: worker_state_machine.parallel_region.end: -; AMDGPU-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel() -; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] -; AMDGPU-DISABLED1: worker_state_machine.done.barrier: -; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] -; AMDGPU-DISABLED1: thread.user_code.check: -; AMDGPU-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; AMDGPU-DISABLED1: common.ret: -; AMDGPU-DISABLED1-NEXT: ret void -; AMDGPU-DISABLED1: user_code.entry: -; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5]] -; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] -; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] -; AMDGPU-DISABLED1-NEXT: call void @__kmpc_target_deinit() -; AMDGPU-DISABLED1-NEXT: br label [[COMMON_RET]] -; -; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50 -; AMDGPU-DISABLED2-SAME: () #[[ATTR0]] { -; AMDGPU-DISABLED2-NEXT: entry: -; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment) -; AMDGPU-DISABLED2-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 -; AMDGPU-DISABLED2-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] -; AMDGPU-DISABLED2: is_worker_check: -; AMDGPU-DISABLED2-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -; AMDGPU-DISABLED2-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() -; AMDGPU-DISABLED2-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] -; AMDGPU-DISABLED2-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] -; AMDGPU-DISABLED2-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] -; AMDGPU-DISABLED2: worker_state_machine.begin: -; AMDGPU-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr -; AMDGPU-DISABLED2-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]]) -; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 -; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr -; AMDGPU-DISABLED2-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null -; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] -; AMDGPU-DISABLED2: worker_state_machine.finished: -; AMDGPU-DISABLED2-NEXT: ret void -; AMDGPU-DISABLED2: worker_state_machine.is_active.check: -; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] -; AMDGPU-DISABLED2: worker_state_machine.parallel_region.check: -; AMDGPU-DISABLED2-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__7_wrapper.ID -; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] -; AMDGPU-DISABLED2: worker_state_machine.parallel_region.execute: -; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]]) -; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; AMDGPU-DISABLED2: worker_state_machine.parallel_region.fallback.execute: -; AMDGPU-DISABLED2-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) -; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] -; AMDGPU-DISABLED2: worker_state_machine.parallel_region.end: -; AMDGPU-DISABLED2-NEXT: call void @__kmpc_kernel_end_parallel() -; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] -; AMDGPU-DISABLED2: worker_state_machine.done.barrier: -; AMDGPU-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] -; AMDGPU-DISABLED2: thread.user_code.check: -; AMDGPU-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; AMDGPU-DISABLED2: common.ret: -; AMDGPU-DISABLED2-NEXT: ret void -; AMDGPU-DISABLED2: user_code.entry: -; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5]] -; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] -; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] -; AMDGPU-DISABLED2-NEXT: call void @__kmpc_target_deinit() -; AMDGPU-DISABLED2-NEXT: br label [[COMMON_RET]] -; -; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50 -; NVPTX-DISABLED1-SAME: () #[[ATTR0]] { -; NVPTX-DISABLED1-NEXT: entry: -; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment) -; NVPTX-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 -; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] -; NVPTX-DISABLED1: is_worker_check: -; NVPTX-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -; NVPTX-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() -; NVPTX-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] -; NVPTX-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] -; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] -; NVPTX-DISABLED1: worker_state_machine.begin: -; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; NVPTX-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) -; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 -; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr -; NVPTX-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null -; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] -; NVPTX-DISABLED1: worker_state_machine.finished: -; NVPTX-DISABLED1-NEXT: ret void -; NVPTX-DISABLED1: worker_state_machine.is_active.check: -; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] -; NVPTX-DISABLED1: worker_state_machine.parallel_region.check: -; NVPTX-DISABLED1-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__7_wrapper.ID -; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] -; NVPTX-DISABLED1: worker_state_machine.parallel_region.execute: -; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]]) -; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; NVPTX-DISABLED1: worker_state_machine.parallel_region.fallback.execute: -; NVPTX-DISABLED1-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) -; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] -; NVPTX-DISABLED1: worker_state_machine.parallel_region.end: -; NVPTX-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel() -; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] -; NVPTX-DISABLED1: worker_state_machine.done.barrier: -; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] -; NVPTX-DISABLED1: thread.user_code.check: -; NVPTX-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; NVPTX-DISABLED1: common.ret: -; NVPTX-DISABLED1-NEXT: ret void -; NVPTX-DISABLED1: user_code.entry: -; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5]] -; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] -; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] -; NVPTX-DISABLED1-NEXT: call void @__kmpc_target_deinit() -; NVPTX-DISABLED1-NEXT: br label [[COMMON_RET]] -; -; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50 -; NVPTX-DISABLED2-SAME: () #[[ATTR0]] { -; NVPTX-DISABLED2-NEXT: entry: -; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment) -; NVPTX-DISABLED2-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 -; NVPTX-DISABLED2-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] -; NVPTX-DISABLED2: is_worker_check: -; NVPTX-DISABLED2-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -; NVPTX-DISABLED2-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() -; NVPTX-DISABLED2-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] -; NVPTX-DISABLED2-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] -; NVPTX-DISABLED2-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] -; NVPTX-DISABLED2: worker_state_machine.begin: -; NVPTX-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; NVPTX-DISABLED2-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) -; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 -; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr -; NVPTX-DISABLED2-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null -; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] -; NVPTX-DISABLED2: worker_state_machine.finished: -; NVPTX-DISABLED2-NEXT: ret void -; NVPTX-DISABLED2: worker_state_machine.is_active.check: -; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] -; NVPTX-DISABLED2: worker_state_machine.parallel_region.check: -; NVPTX-DISABLED2-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__7_wrapper.ID -; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] -; NVPTX-DISABLED2: worker_state_machine.parallel_region.execute: -; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]]) -; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; NVPTX-DISABLED2: worker_state_machine.parallel_region.fallback.execute: -; NVPTX-DISABLED2-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) -; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] -; NVPTX-DISABLED2: worker_state_machine.parallel_region.end: -; NVPTX-DISABLED2-NEXT: call void @__kmpc_kernel_end_parallel() -; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] -; NVPTX-DISABLED2: worker_state_machine.done.barrier: -; NVPTX-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] -; NVPTX-DISABLED2: thread.user_code.check: -; NVPTX-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; NVPTX-DISABLED2: common.ret: -; NVPTX-DISABLED2-NEXT: ret void -; NVPTX-DISABLED2: user_code.entry: -; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5]] -; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] -; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] -; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit() -; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]] -; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50 ; AMDGPU-DISABLED-SAME: () #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU-DISABLED: is_worker_check: @@ -2694,15 +1368,16 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_g ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5]] ; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit() +; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]] +; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50 ; NVPTX-DISABLED-SAME: () #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX-DISABLED: is_worker_check: @@ -2746,12 +1421,13 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_g ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5]] ; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit() +; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]] +; entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 - %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment) + %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %common.ret @@ -2763,7 +1439,7 @@ user_code.entry: ; preds = %entry store i32 0, ptr %.zero.addr, align 4 store i32 %1, ptr %.threadid_temp., align 4, !tbaa !18 call void @__omp_outlined__6(ptr %.threadid_temp., ptr %.zero.addr) #6 - call void @__kmpc_target_deinit() + call void @__kmpc_target_deinit(ptr @1, i8 1) br label %common.ret } @@ -2835,86 +1511,6 @@ define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias % ; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 ; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] ; -; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__6 -; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU-DISABLED1-NEXT: entry: -; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 -; AMDGPU-DISABLED1-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), align 4, !tbaa [[TBAA18]] -; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND:%.*]] -; AMDGPU-DISABLED1: for.cond: -; AMDGPU-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] -; AMDGPU-DISABLED1-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 -; AMDGPU-DISABLED1-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] -; AMDGPU-DISABLED1: for.cond.cleanup: -; AMDGPU-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR8]] -; AMDGPU-DISABLED1-NEXT: ret void -; AMDGPU-DISABLED1: for.body: -; AMDGPU-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26]] -; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] -; AMDGPU-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1) -; AMDGPU-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] -; -; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__6 -; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU-DISABLED2-NEXT: entry: -; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 -; AMDGPU-DISABLED2-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), align 4, !tbaa [[TBAA18]] -; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND:%.*]] -; AMDGPU-DISABLED2: for.cond: -; AMDGPU-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] -; AMDGPU-DISABLED2-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 -; AMDGPU-DISABLED2-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] -; AMDGPU-DISABLED2: for.cond.cleanup: -; AMDGPU-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR8]] -; AMDGPU-DISABLED2-NEXT: ret void -; AMDGPU-DISABLED2: for.body: -; AMDGPU-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26]] -; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] -; AMDGPU-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1) -; AMDGPU-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] -; -; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__6 -; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX-DISABLED1-NEXT: entry: -; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 -; NVPTX-DISABLED1-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), align 4, !tbaa [[TBAA18]] -; NVPTX-DISABLED1-NEXT: br label [[FOR_COND:%.*]] -; NVPTX-DISABLED1: for.cond: -; NVPTX-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] -; NVPTX-DISABLED1-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 -; NVPTX-DISABLED1-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] -; NVPTX-DISABLED1: for.cond.cleanup: -; NVPTX-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR8]] -; NVPTX-DISABLED1-NEXT: ret void -; NVPTX-DISABLED1: for.body: -; NVPTX-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26]] -; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] -; NVPTX-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1) -; NVPTX-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; NVPTX-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] -; -; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__6 -; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX-DISABLED2-NEXT: entry: -; NVPTX-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 -; NVPTX-DISABLED2-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), align 4, !tbaa [[TBAA18]] -; NVPTX-DISABLED2-NEXT: br label [[FOR_COND:%.*]] -; NVPTX-DISABLED2: for.cond: -; NVPTX-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] -; NVPTX-DISABLED2-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 -; NVPTX-DISABLED2-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] -; NVPTX-DISABLED2: for.cond.cleanup: -; NVPTX-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR8]] -; NVPTX-DISABLED2-NEXT: ret void -; NVPTX-DISABLED2: for.body: -; NVPTX-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26]] -; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] -; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1) -; NVPTX-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; NVPTX-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] -; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__6 ; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: @@ -2934,6 +1530,7 @@ define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias % ; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1) ; AMDGPU-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 ; AMDGPU-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] +; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__6 ; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: @@ -2953,6 +1550,7 @@ define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias % ; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1) ; NVPTX-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 ; NVPTX-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] +; entry: %captured_vars_addrs = alloca [1 x ptr], align 8 %x = call align 4 ptr @__kmpc_alloc_shared(i64 4) @@ -2997,42 +1595,6 @@ define internal void @__omp_outlined__7(ptr noalias %.global_tid., ptr noalias % ; NVPTX-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR9]] ; NVPTX-NEXT: ret void ; -; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__7 -; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { -; AMDGPU-DISABLED1-NEXT: entry: -; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]] -; AMDGPU-DISABLED1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 -; AMDGPU-DISABLED1-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]] -; AMDGPU-DISABLED1-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR9]] -; AMDGPU-DISABLED1-NEXT: ret void -; -; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__7 -; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { -; AMDGPU-DISABLED2-NEXT: entry: -; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]] -; AMDGPU-DISABLED2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 -; AMDGPU-DISABLED2-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]] -; AMDGPU-DISABLED2-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR9]] -; AMDGPU-DISABLED2-NEXT: ret void -; -; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__7 -; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { -; NVPTX-DISABLED1-NEXT: entry: -; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]] -; NVPTX-DISABLED1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 -; NVPTX-DISABLED1-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]] -; NVPTX-DISABLED1-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR9]] -; NVPTX-DISABLED1-NEXT: ret void -; -; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__7 -; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { -; NVPTX-DISABLED2-NEXT: entry: -; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]] -; NVPTX-DISABLED2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 -; NVPTX-DISABLED2-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]] -; NVPTX-DISABLED2-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR9]] -; NVPTX-DISABLED2-NEXT: ret void -; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7 ; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: @@ -3041,6 +1603,7 @@ define internal void @__omp_outlined__7(ptr noalias %.global_tid., ptr noalias % ; AMDGPU-DISABLED-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]] ; AMDGPU-DISABLED-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR9]] ; AMDGPU-DISABLED-NEXT: ret void +; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7 ; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: @@ -3049,6 +1612,7 @@ define internal void @__omp_outlined__7(ptr noalias %.global_tid., ptr noalias % ; NVPTX-DISABLED-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]] ; NVPTX-DISABLED-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR9]] ; NVPTX-DISABLED-NEXT: ret void +; entry: %0 = load i32, ptr %x, align 4, !tbaa !18 %inc = add nsw i32 %0, 1 @@ -3083,54 +1647,6 @@ define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR5]] ; NVPTX-NEXT: ret void ; -; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper -; AMDGPU-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { -; AMDGPU-DISABLED1-NEXT: entry: -; AMDGPU-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; AMDGPU-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU-DISABLED1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 -; AMDGPU-DISABLED1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]] -; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR5]] -; AMDGPU-DISABLED1-NEXT: ret void -; -; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper -; AMDGPU-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { -; AMDGPU-DISABLED2-NEXT: entry: -; AMDGPU-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; AMDGPU-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU-DISABLED2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 -; AMDGPU-DISABLED2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]] -; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR5]] -; AMDGPU-DISABLED2-NEXT: ret void -; -; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper -; NVPTX-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { -; NVPTX-DISABLED1-NEXT: entry: -; NVPTX-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; NVPTX-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX-DISABLED1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 -; NVPTX-DISABLED1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]] -; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR5]] -; NVPTX-DISABLED1-NEXT: ret void -; -; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper -; NVPTX-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { -; NVPTX-DISABLED2-NEXT: entry: -; NVPTX-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; NVPTX-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX-DISABLED2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 -; NVPTX-DISABLED2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]] -; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR5]] -; NVPTX-DISABLED2-NEXT: ret void -; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper ; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { ; AMDGPU-DISABLED-NEXT: entry: @@ -3142,6 +1658,7 @@ define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]] ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR5]] ; AMDGPU-DISABLED-NEXT: ret void +; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper ; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { ; NVPTX-DISABLED-NEXT: entry: @@ -3153,6 +1670,7 @@ define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]] ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR5]] ; NVPTX-DISABLED-NEXT: ret void +; entry: %.addr1 = alloca i32, align 4 %.zero.addr = alloca i32, align 4 @@ -3174,7 +1692,7 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65() #0 ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU: is_worker_check: @@ -3212,7 +1730,7 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65() #0 ; AMDGPU: user_code.entry: ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5]] ; AMDGPU-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit() +; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; AMDGPU-NEXT: br label [[COMMON_RET]] ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65 @@ -3221,7 +1739,7 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65() #0 ; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX: is_worker_check: @@ -3258,202 +1776,16 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65() #0 ; NVPTX: user_code.entry: ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5]] ; NVPTX-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] -; NVPTX-NEXT: call void @__kmpc_target_deinit() +; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; NVPTX-NEXT: br label [[COMMON_RET]] ; -; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65 -; AMDGPU-DISABLED1-SAME: () #[[ATTR0]] { -; AMDGPU-DISABLED1-NEXT: entry: -; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment) -; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 -; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] -; AMDGPU-DISABLED1: is_worker_check: -; AMDGPU-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -; AMDGPU-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() -; AMDGPU-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] -; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] -; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] -; AMDGPU-DISABLED1: worker_state_machine.begin: -; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr -; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]]) -; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 -; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr -; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null -; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] -; AMDGPU-DISABLED1: worker_state_machine.finished: -; AMDGPU-DISABLED1-NEXT: ret void -; AMDGPU-DISABLED1: worker_state_machine.is_active.check: -; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] -; AMDGPU-DISABLED1: worker_state_machine.parallel_region.fallback.execute: -; AMDGPU-DISABLED1-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) -; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; AMDGPU-DISABLED1: worker_state_machine.parallel_region.end: -; AMDGPU-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel() -; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] -; AMDGPU-DISABLED1: worker_state_machine.done.barrier: -; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] -; AMDGPU-DISABLED1: thread.user_code.check: -; AMDGPU-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; AMDGPU-DISABLED1: common.ret: -; AMDGPU-DISABLED1-NEXT: ret void -; AMDGPU-DISABLED1: user_code.entry: -; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5]] -; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] -; AMDGPU-DISABLED1-NEXT: call void @__kmpc_target_deinit() -; AMDGPU-DISABLED1-NEXT: br label [[COMMON_RET]] -; -; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65 -; AMDGPU-DISABLED2-SAME: () #[[ATTR0]] { -; AMDGPU-DISABLED2-NEXT: entry: -; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment) -; AMDGPU-DISABLED2-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 -; AMDGPU-DISABLED2-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] -; AMDGPU-DISABLED2: is_worker_check: -; AMDGPU-DISABLED2-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -; AMDGPU-DISABLED2-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() -; AMDGPU-DISABLED2-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] -; AMDGPU-DISABLED2-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] -; AMDGPU-DISABLED2-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] -; AMDGPU-DISABLED2: worker_state_machine.begin: -; AMDGPU-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr -; AMDGPU-DISABLED2-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]]) -; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 -; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr -; AMDGPU-DISABLED2-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null -; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] -; AMDGPU-DISABLED2: worker_state_machine.finished: -; AMDGPU-DISABLED2-NEXT: ret void -; AMDGPU-DISABLED2: worker_state_machine.is_active.check: -; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] -; AMDGPU-DISABLED2: worker_state_machine.parallel_region.fallback.execute: -; AMDGPU-DISABLED2-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) -; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; AMDGPU-DISABLED2: worker_state_machine.parallel_region.end: -; AMDGPU-DISABLED2-NEXT: call void @__kmpc_kernel_end_parallel() -; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] -; AMDGPU-DISABLED2: worker_state_machine.done.barrier: -; AMDGPU-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] -; AMDGPU-DISABLED2: thread.user_code.check: -; AMDGPU-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; AMDGPU-DISABLED2: common.ret: -; AMDGPU-DISABLED2-NEXT: ret void -; AMDGPU-DISABLED2: user_code.entry: -; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5]] -; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] -; AMDGPU-DISABLED2-NEXT: call void @__kmpc_target_deinit() -; AMDGPU-DISABLED2-NEXT: br label [[COMMON_RET]] -; -; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65 -; NVPTX-DISABLED1-SAME: () #[[ATTR0]] { -; NVPTX-DISABLED1-NEXT: entry: -; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment) -; NVPTX-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 -; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] -; NVPTX-DISABLED1: is_worker_check: -; NVPTX-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -; NVPTX-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() -; NVPTX-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] -; NVPTX-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] -; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] -; NVPTX-DISABLED1: worker_state_machine.begin: -; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; NVPTX-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) -; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 -; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr -; NVPTX-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null -; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] -; NVPTX-DISABLED1: worker_state_machine.finished: -; NVPTX-DISABLED1-NEXT: ret void -; NVPTX-DISABLED1: worker_state_machine.is_active.check: -; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] -; NVPTX-DISABLED1: worker_state_machine.parallel_region.fallback.execute: -; NVPTX-DISABLED1-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) -; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; NVPTX-DISABLED1: worker_state_machine.parallel_region.end: -; NVPTX-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel() -; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] -; NVPTX-DISABLED1: worker_state_machine.done.barrier: -; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] -; NVPTX-DISABLED1: thread.user_code.check: -; NVPTX-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; NVPTX-DISABLED1: common.ret: -; NVPTX-DISABLED1-NEXT: ret void -; NVPTX-DISABLED1: user_code.entry: -; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5]] -; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] -; NVPTX-DISABLED1-NEXT: call void @__kmpc_target_deinit() -; NVPTX-DISABLED1-NEXT: br label [[COMMON_RET]] -; -; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65 -; NVPTX-DISABLED2-SAME: () #[[ATTR0]] { -; NVPTX-DISABLED2-NEXT: entry: -; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment) -; NVPTX-DISABLED2-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 -; NVPTX-DISABLED2-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] -; NVPTX-DISABLED2: is_worker_check: -; NVPTX-DISABLED2-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -; NVPTX-DISABLED2-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() -; NVPTX-DISABLED2-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] -; NVPTX-DISABLED2-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] -; NVPTX-DISABLED2-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] -; NVPTX-DISABLED2: worker_state_machine.begin: -; NVPTX-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; NVPTX-DISABLED2-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) -; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 -; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr -; NVPTX-DISABLED2-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null -; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] -; NVPTX-DISABLED2: worker_state_machine.finished: -; NVPTX-DISABLED2-NEXT: ret void -; NVPTX-DISABLED2: worker_state_machine.is_active.check: -; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] -; NVPTX-DISABLED2: worker_state_machine.parallel_region.fallback.execute: -; NVPTX-DISABLED2-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) -; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; NVPTX-DISABLED2: worker_state_machine.parallel_region.end: -; NVPTX-DISABLED2-NEXT: call void @__kmpc_kernel_end_parallel() -; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] -; NVPTX-DISABLED2: worker_state_machine.done.barrier: -; NVPTX-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] -; NVPTX-DISABLED2: thread.user_code.check: -; NVPTX-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; NVPTX-DISABLED2: common.ret: -; NVPTX-DISABLED2-NEXT: ret void -; NVPTX-DISABLED2: user_code.entry: -; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5]] -; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] -; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit() -; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]] -; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65 ; AMDGPU-DISABLED-SAME: () #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU-DISABLED: is_worker_check: @@ -3491,15 +1823,16 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65() #0 ; AMDGPU-DISABLED: user_code.entry: ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5]] ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit() +; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]] +; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65 ; NVPTX-DISABLED-SAME: () #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX-DISABLED: is_worker_check: @@ -3536,12 +1869,13 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65() #0 ; NVPTX-DISABLED: user_code.entry: ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5]] ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit() +; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]] +; entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 - %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment) + %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %common.ret @@ -3553,7 +1887,7 @@ user_code.entry: ; preds = %entry store i32 0, ptr %.zero.addr, align 4 store i32 %1, ptr %.threadid_temp., align 4, !tbaa !18 call void @__omp_outlined__8(ptr %.threadid_temp., ptr %.zero.addr) #6 - call void @__kmpc_target_deinit() + call void @__kmpc_target_deinit(ptr @1, i8 1) br label %common.ret } @@ -3571,40 +1905,18 @@ define internal void @__omp_outlined__8(ptr noalias %.global_tid., ptr noalias % ; NVPTX-NEXT: call void @unknown() #[[ATTR9]] ; NVPTX-NEXT: ret void ; -; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__8 -; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU-DISABLED1-NEXT: entry: -; AMDGPU-DISABLED1-NEXT: call void @unknown() #[[ATTR9]] -; AMDGPU-DISABLED1-NEXT: ret void -; -; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__8 -; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU-DISABLED2-NEXT: entry: -; AMDGPU-DISABLED2-NEXT: call void @unknown() #[[ATTR9]] -; AMDGPU-DISABLED2-NEXT: ret void -; -; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__8 -; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX-DISABLED1-NEXT: entry: -; NVPTX-DISABLED1-NEXT: call void @unknown() #[[ATTR9]] -; NVPTX-DISABLED1-NEXT: ret void -; -; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__8 -; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX-DISABLED2-NEXT: entry: -; NVPTX-DISABLED2-NEXT: call void @unknown() #[[ATTR9]] -; NVPTX-DISABLED2-NEXT: ret void -; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__8 ; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR9]] ; AMDGPU-DISABLED-NEXT: ret void +; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__8 ; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR9]] ; NVPTX-DISABLED-NEXT: ret void +; entry: call void @unknown() #11 ret void @@ -3617,7 +1929,7 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74() #0 { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU: is_worker_check: @@ -3663,7 +1975,7 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74() #0 { ; AMDGPU-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR5]] ; AMDGPU-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR5]] ; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU-NEXT: call void @__kmpc_target_deinit() +; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; AMDGPU-NEXT: br label [[COMMON_RET]] ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74 @@ -3671,7 +1983,7 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74() #0 { ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX: is_worker_check: @@ -3716,229 +2028,15 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74() #0 { ; NVPTX-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR5]] ; NVPTX-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR5]] ; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX-NEXT: call void @__kmpc_target_deinit() +; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; NVPTX-NEXT: br label [[COMMON_RET]] ; -; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74 -; AMDGPU-DISABLED1-SAME: () #[[ATTR3:[0-9]+]] { -; AMDGPU-DISABLED1-NEXT: entry: -; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment) -; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 -; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] -; AMDGPU-DISABLED1: is_worker_check: -; AMDGPU-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -; AMDGPU-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() -; AMDGPU-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] -; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] -; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] -; AMDGPU-DISABLED1: worker_state_machine.begin: -; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr -; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]]) -; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 -; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr -; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null -; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] -; AMDGPU-DISABLED1: worker_state_machine.finished: -; AMDGPU-DISABLED1-NEXT: ret void -; AMDGPU-DISABLED1: worker_state_machine.is_active.check: -; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] -; AMDGPU-DISABLED1: worker_state_machine.parallel_region.check: -; AMDGPU-DISABLED1-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__9_wrapper.ID -; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] -; AMDGPU-DISABLED1: worker_state_machine.parallel_region.execute: -; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]]) -; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; AMDGPU-DISABLED1: worker_state_machine.parallel_region.fallback.execute: -; AMDGPU-DISABLED1-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) -; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] -; AMDGPU-DISABLED1: worker_state_machine.parallel_region.end: -; AMDGPU-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel() -; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] -; AMDGPU-DISABLED1: worker_state_machine.done.barrier: -; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] -; AMDGPU-DISABLED1: thread.user_code.check: -; AMDGPU-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; AMDGPU-DISABLED1: common.ret: -; AMDGPU-DISABLED1-NEXT: ret void -; AMDGPU-DISABLED1: user_code.entry: -; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5]] -; AMDGPU-DISABLED1-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR5]] -; AMDGPU-DISABLED1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR5]] -; AMDGPU-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU-DISABLED1-NEXT: call void @__kmpc_target_deinit() -; AMDGPU-DISABLED1-NEXT: br label [[COMMON_RET]] -; -; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74 -; AMDGPU-DISABLED2-SAME: () #[[ATTR3:[0-9]+]] { -; AMDGPU-DISABLED2-NEXT: entry: -; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment) -; AMDGPU-DISABLED2-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 -; AMDGPU-DISABLED2-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] -; AMDGPU-DISABLED2: is_worker_check: -; AMDGPU-DISABLED2-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -; AMDGPU-DISABLED2-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() -; AMDGPU-DISABLED2-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] -; AMDGPU-DISABLED2-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] -; AMDGPU-DISABLED2-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] -; AMDGPU-DISABLED2: worker_state_machine.begin: -; AMDGPU-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr -; AMDGPU-DISABLED2-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]]) -; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 -; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr -; AMDGPU-DISABLED2-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null -; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] -; AMDGPU-DISABLED2: worker_state_machine.finished: -; AMDGPU-DISABLED2-NEXT: ret void -; AMDGPU-DISABLED2: worker_state_machine.is_active.check: -; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] -; AMDGPU-DISABLED2: worker_state_machine.parallel_region.check: -; AMDGPU-DISABLED2-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__9_wrapper.ID -; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] -; AMDGPU-DISABLED2: worker_state_machine.parallel_region.execute: -; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]]) -; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; AMDGPU-DISABLED2: worker_state_machine.parallel_region.fallback.execute: -; AMDGPU-DISABLED2-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) -; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] -; AMDGPU-DISABLED2: worker_state_machine.parallel_region.end: -; AMDGPU-DISABLED2-NEXT: call void @__kmpc_kernel_end_parallel() -; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] -; AMDGPU-DISABLED2: worker_state_machine.done.barrier: -; AMDGPU-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] -; AMDGPU-DISABLED2: thread.user_code.check: -; AMDGPU-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; AMDGPU-DISABLED2: common.ret: -; AMDGPU-DISABLED2-NEXT: ret void -; AMDGPU-DISABLED2: user_code.entry: -; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5]] -; AMDGPU-DISABLED2-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR5]] -; AMDGPU-DISABLED2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR5]] -; AMDGPU-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU-DISABLED2-NEXT: call void @__kmpc_target_deinit() -; AMDGPU-DISABLED2-NEXT: br label [[COMMON_RET]] -; -; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74 -; NVPTX-DISABLED1-SAME: () #[[ATTR3:[0-9]+]] { -; NVPTX-DISABLED1-NEXT: entry: -; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment) -; NVPTX-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 -; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] -; NVPTX-DISABLED1: is_worker_check: -; NVPTX-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -; NVPTX-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() -; NVPTX-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] -; NVPTX-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] -; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] -; NVPTX-DISABLED1: worker_state_machine.begin: -; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; NVPTX-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) -; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 -; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr -; NVPTX-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null -; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] -; NVPTX-DISABLED1: worker_state_machine.finished: -; NVPTX-DISABLED1-NEXT: ret void -; NVPTX-DISABLED1: worker_state_machine.is_active.check: -; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] -; NVPTX-DISABLED1: worker_state_machine.parallel_region.check: -; NVPTX-DISABLED1-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__9_wrapper.ID -; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] -; NVPTX-DISABLED1: worker_state_machine.parallel_region.execute: -; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]]) -; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; NVPTX-DISABLED1: worker_state_machine.parallel_region.fallback.execute: -; NVPTX-DISABLED1-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) -; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] -; NVPTX-DISABLED1: worker_state_machine.parallel_region.end: -; NVPTX-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel() -; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] -; NVPTX-DISABLED1: worker_state_machine.done.barrier: -; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] -; NVPTX-DISABLED1: thread.user_code.check: -; NVPTX-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; NVPTX-DISABLED1: common.ret: -; NVPTX-DISABLED1-NEXT: ret void -; NVPTX-DISABLED1: user_code.entry: -; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5]] -; NVPTX-DISABLED1-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR5]] -; NVPTX-DISABLED1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR5]] -; NVPTX-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX-DISABLED1-NEXT: call void @__kmpc_target_deinit() -; NVPTX-DISABLED1-NEXT: br label [[COMMON_RET]] -; -; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74 -; NVPTX-DISABLED2-SAME: () #[[ATTR3:[0-9]+]] { -; NVPTX-DISABLED2-NEXT: entry: -; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment) -; NVPTX-DISABLED2-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 -; NVPTX-DISABLED2-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] -; NVPTX-DISABLED2: is_worker_check: -; NVPTX-DISABLED2-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -; NVPTX-DISABLED2-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() -; NVPTX-DISABLED2-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] -; NVPTX-DISABLED2-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] -; NVPTX-DISABLED2-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] -; NVPTX-DISABLED2: worker_state_machine.begin: -; NVPTX-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; NVPTX-DISABLED2-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) -; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 -; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr -; NVPTX-DISABLED2-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null -; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] -; NVPTX-DISABLED2: worker_state_machine.finished: -; NVPTX-DISABLED2-NEXT: ret void -; NVPTX-DISABLED2: worker_state_machine.is_active.check: -; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] -; NVPTX-DISABLED2: worker_state_machine.parallel_region.check: -; NVPTX-DISABLED2-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__9_wrapper.ID -; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] -; NVPTX-DISABLED2: worker_state_machine.parallel_region.execute: -; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]]) -; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; NVPTX-DISABLED2: worker_state_machine.parallel_region.fallback.execute: -; NVPTX-DISABLED2-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) -; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] -; NVPTX-DISABLED2: worker_state_machine.parallel_region.end: -; NVPTX-DISABLED2-NEXT: call void @__kmpc_kernel_end_parallel() -; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] -; NVPTX-DISABLED2: worker_state_machine.done.barrier: -; NVPTX-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] -; NVPTX-DISABLED2: thread.user_code.check: -; NVPTX-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] -; NVPTX-DISABLED2: common.ret: -; NVPTX-DISABLED2-NEXT: ret void -; NVPTX-DISABLED2: user_code.entry: -; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5]] -; NVPTX-DISABLED2-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR5]] -; NVPTX-DISABLED2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR5]] -; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit() -; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]] -; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74 ; AMDGPU-DISABLED-SAME: () #[[ATTR3:[0-9]+]] { ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU-DISABLED: is_worker_check: @@ -3984,14 +2082,15 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74() #0 { ; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR5]] ; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR5]] ; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit() +; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]] +; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74 ; NVPTX-DISABLED-SAME: () #[[ATTR3:[0-9]+]] { ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX-DISABLED: is_worker_check: @@ -4036,11 +2135,12 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74() #0 { ; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR5]] ; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR5]] ; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit() +; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]] +; entry: %captured_vars_addrs = alloca [0 x ptr], align 8 - %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment) + %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %common.ret @@ -4052,7 +2152,7 @@ user_code.entry: ; preds = %entry %2 = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %1, i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") %3 = call i32 @__kmpc_omp_task(ptr @1, i32 %1, ptr %2) call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper, ptr %captured_vars_addrs, i64 0) - call void @__kmpc_target_deinit() + call void @__kmpc_target_deinit(ptr @1, i8 1) br label %common.ret } @@ -4070,40 +2170,18 @@ define internal void @.omp_outlined.(i32 %.global_tid., ptr noalias %.part_id., ; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR8]] ; NVPTX-NEXT: ret void ; -; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@.omp_outlined. -; AMDGPU-DISABLED1-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR4:[0-9]+]] { -; AMDGPU-DISABLED1-NEXT: entry: -; AMDGPU-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR8]] -; AMDGPU-DISABLED1-NEXT: ret void -; -; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@.omp_outlined. -; AMDGPU-DISABLED2-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR4:[0-9]+]] { -; AMDGPU-DISABLED2-NEXT: entry: -; AMDGPU-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR8]] -; AMDGPU-DISABLED2-NEXT: ret void -; -; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@.omp_outlined. -; NVPTX-DISABLED1-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR4:[0-9]+]] { -; NVPTX-DISABLED1-NEXT: entry: -; NVPTX-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR8]] -; NVPTX-DISABLED1-NEXT: ret void -; -; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@.omp_outlined. -; NVPTX-DISABLED2-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR4:[0-9]+]] { -; NVPTX-DISABLED2-NEXT: entry: -; NVPTX-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR8]] -; NVPTX-DISABLED2-NEXT: ret void -; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@.omp_outlined. ; AMDGPU-DISABLED-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR4:[0-9]+]] { ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR8]] ; AMDGPU-DISABLED-NEXT: ret void +; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@.omp_outlined. ; NVPTX-DISABLED-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR4:[0-9]+]] { ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR8]] ; NVPTX-DISABLED-NEXT: ret void +; entry: call void @spmd_amenable() #10 ret void @@ -4141,37 +2219,23 @@ declare void @unknowni32p(ptr) #2 declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1 ; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning. -define weak i32 @__kmpc_target_init(ptr) { +define weak i32 @__kmpc_target_init(ptr, i8, i1) { ; AMDGPU-LABEL: define {{[^@]+}}@__kmpc_target_init -; AMDGPU-SAME: (ptr [[TMP0:%.*]]) { +; AMDGPU-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { ; AMDGPU-NEXT: ret i32 0 ; ; NVPTX-LABEL: define {{[^@]+}}@__kmpc_target_init -; NVPTX-SAME: (ptr [[TMP0:%.*]]) { +; NVPTX-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { ; NVPTX-NEXT: ret i32 0 ; -; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__kmpc_target_init -; AMDGPU-DISABLED1-SAME: (ptr [[TMP0:%.*]]) { -; AMDGPU-DISABLED1-NEXT: ret i32 0 -; -; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__kmpc_target_init -; AMDGPU-DISABLED2-SAME: (ptr [[TMP0:%.*]]) { -; AMDGPU-DISABLED2-NEXT: ret i32 0 -; -; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__kmpc_target_init -; NVPTX-DISABLED1-SAME: (ptr [[TMP0:%.*]]) { -; NVPTX-DISABLED1-NEXT: ret i32 0 -; -; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__kmpc_target_init -; NVPTX-DISABLED2-SAME: (ptr [[TMP0:%.*]]) { -; NVPTX-DISABLED2-NEXT: ret i32 0 -; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__kmpc_target_init -; AMDGPU-DISABLED-SAME: (ptr [[TMP0:%.*]]) { +; AMDGPU-DISABLED-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { ; AMDGPU-DISABLED-NEXT: ret i32 0 +; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__kmpc_target_init -; NVPTX-DISABLED-SAME: (ptr [[TMP0:%.*]]) { +; NVPTX-DISABLED-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { ; NVPTX-DISABLED-NEXT: ret i32 0 +; ret i32 0 } @@ -4189,7 +2253,7 @@ declare void @spmd_amenable() #5 ; Function Attrs: nounwind declare i32 @__kmpc_global_thread_num(ptr) #6 -declare void @__kmpc_target_deinit() +declare void @__kmpc_target_deinit(ptr, i8) ; Function Attrs: alwaysinline convergent norecurse nounwind @@ -4206,40 +2270,18 @@ define internal void @__omp_outlined__9(ptr noalias %.global_tid., ptr noalias % ; NVPTX-NEXT: call void @unknown() #[[ATTR9]] ; NVPTX-NEXT: ret void ; -; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__9 -; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU-DISABLED1-NEXT: entry: -; AMDGPU-DISABLED1-NEXT: call void @unknown() #[[ATTR9]] -; AMDGPU-DISABLED1-NEXT: ret void -; -; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__9 -; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU-DISABLED2-NEXT: entry: -; AMDGPU-DISABLED2-NEXT: call void @unknown() #[[ATTR9]] -; AMDGPU-DISABLED2-NEXT: ret void -; -; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__9 -; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX-DISABLED1-NEXT: entry: -; NVPTX-DISABLED1-NEXT: call void @unknown() #[[ATTR9]] -; NVPTX-DISABLED1-NEXT: ret void -; -; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__9 -; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX-DISABLED2-NEXT: entry: -; NVPTX-DISABLED2-NEXT: call void @unknown() #[[ATTR9]] -; NVPTX-DISABLED2-NEXT: ret void -; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9 ; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR9]] ; AMDGPU-DISABLED-NEXT: ret void +; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9 ; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR9]] ; NVPTX-DISABLED-NEXT: ret void +; entry: call void @unknown() #11 ret void @@ -4267,46 +2309,6 @@ define internal void @__omp_outlined__9_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] ; NVPTX-NEXT: ret void ; -; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper -; AMDGPU-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { -; AMDGPU-DISABLED1-NEXT: entry: -; AMDGPU-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; AMDGPU-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] -; AMDGPU-DISABLED1-NEXT: ret void -; -; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper -; AMDGPU-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { -; AMDGPU-DISABLED2-NEXT: entry: -; AMDGPU-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; AMDGPU-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] -; AMDGPU-DISABLED2-NEXT: ret void -; -; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper -; NVPTX-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { -; NVPTX-DISABLED1-NEXT: entry: -; NVPTX-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; NVPTX-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] -; NVPTX-DISABLED1-NEXT: ret void -; -; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper -; NVPTX-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { -; NVPTX-DISABLED2-NEXT: entry: -; NVPTX-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; NVPTX-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] -; NVPTX-DISABLED2-NEXT: ret void -; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper ; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { ; AMDGPU-DISABLED-NEXT: entry: @@ -4316,6 +2318,7 @@ define internal void @__omp_outlined__9_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] ; AMDGPU-DISABLED-NEXT: ret void +; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper ; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { ; NVPTX-DISABLED-NEXT: entry: @@ -4325,6 +2328,7 @@ define internal void @__omp_outlined__9_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] ; NVPTX-DISABLED-NEXT: ret void +; entry: %.addr1 = alloca i32, align 4 %.zero.addr = alloca i32, align 4 @@ -4389,92 +2393,6 @@ attributes #11 = { convergent } !30 = !{!31, !27, i64 0} !31 = !{!"kmp_task_t_with_privates", !32, i64 0} !32 = !{!"kmp_task_t", !27, i64 0, !27, i64 8, !19, i64 16, !20, i64 24, !20, i64 32} -; AMDGPU-DISABLED: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind } -; AMDGPU-DISABLED: attributes #[[ATTR1]] = { norecurse } -; AMDGPU-DISABLED: attributes #[[ATTR2]] = { convergent norecurse nounwind } -; AMDGPU-DISABLED: attributes #[[ATTR3]] = { alwaysinline norecurse nounwind } -; AMDGPU-DISABLED: attributes #[[ATTR4]] = { alwaysinline convergent nounwind } -; AMDGPU-DISABLED: attributes #[[ATTR5]] = { nounwind } -; AMDGPU-DISABLED: attributes #[[ATTR6:[0-9]+]] = { nosync nounwind } -; AMDGPU-DISABLED: attributes #[[ATTR7:[0-9]+]] = { nofree nosync nounwind allocsize(0) } -; AMDGPU-DISABLED: attributes #[[ATTR8]] = { convergent "llvm.assume"="ompx_spmd_amenable" } -; AMDGPU-DISABLED: attributes #[[ATTR9]] = { convergent } -; AMDGPU-DISABLED: attributes #[[ATTR10:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } -; AMDGPU-DISABLED: attributes #[[ATTR11:[0-9]+]] = { alwaysinline } -; AMDGPU-DISABLED: attributes #[[ATTR12:[0-9]+]] = { convergent nounwind } -; NVPTX-DISABLED: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind } -; NVPTX-DISABLED: attributes #[[ATTR1]] = { norecurse } -; NVPTX-DISABLED: attributes #[[ATTR2]] = { convergent norecurse nounwind } -; NVPTX-DISABLED: attributes #[[ATTR3]] = { alwaysinline norecurse nounwind } -; NVPTX-DISABLED: attributes #[[ATTR4]] = { alwaysinline convergent nounwind } -; NVPTX-DISABLED: attributes #[[ATTR5]] = { nounwind } -; NVPTX-DISABLED: attributes #[[ATTR6:[0-9]+]] = { nosync nounwind } -; NVPTX-DISABLED: attributes #[[ATTR7:[0-9]+]] = { nofree nosync nounwind allocsize(0) } -; NVPTX-DISABLED: attributes #[[ATTR8]] = { convergent "llvm.assume"="ompx_spmd_amenable" } -; NVPTX-DISABLED: attributes #[[ATTR9]] = { convergent } -; NVPTX-DISABLED: attributes #[[ATTR10:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } -; NVPTX-DISABLED: attributes #[[ATTR11:[0-9]+]] = { alwaysinline } -; NVPTX-DISABLED: attributes #[[ATTR12:[0-9]+]] = { convergent nounwind } -; AMDGPU-DISABLED: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5} -; AMDGPU-DISABLED: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1} -; AMDGPU-DISABLED: [[META2:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0} -; AMDGPU-DISABLED: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2} -; AMDGPU-DISABLED: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4} -; AMDGPU-DISABLED: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3} -; AMDGPU-DISABLED: [[META6:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1} -; AMDGPU-DISABLED: [[META7:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1} -; AMDGPU-DISABLED: [[META8:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1} -; AMDGPU-DISABLED: [[META9:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1} -; AMDGPU-DISABLED: [[META10:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1} -; AMDGPU-DISABLED: [[META11:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1} -; AMDGPU-DISABLED: [[META12:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} -; AMDGPU-DISABLED: [[META13:![0-9]+]] = !{i32 7, !"openmp", i32 50} -; AMDGPU-DISABLED: [[META14:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} -; AMDGPU-DISABLED: [[META15:![0-9]+]] = !{i32 8, !"PIC Level", i32 2} -; AMDGPU-DISABLED: [[META16:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} -; AMDGPU-DISABLED: [[META17:![0-9]+]] = !{!"clang version 14.0.0"} -; AMDGPU-DISABLED: [[TBAA18]] = !{!19, !19, i64 0} -; AMDGPU-DISABLED: [[META19:![0-9]+]] = !{!"int", !20, i64 0} -; AMDGPU-DISABLED: [[META20:![0-9]+]] = !{!"omnipotent char", !21, i64 0} -; AMDGPU-DISABLED: [[META21:![0-9]+]] = !{!"Simple C/C++ TBAA"} -; AMDGPU-DISABLED: [[LOOP22]] = distinct !{!22, !23, !24} -; AMDGPU-DISABLED: [[META23:![0-9]+]] = !{!"llvm.loop.mustprogress"} -; AMDGPU-DISABLED: [[META24:![0-9]+]] = !{!"llvm.loop.unroll.disable"} -; AMDGPU-DISABLED: [[LOOP25]] = distinct !{!25, !23, !24} -; AMDGPU-DISABLED: [[TBAA26]] = !{!27, !27, i64 0} -; AMDGPU-DISABLED: [[META27:![0-9]+]] = !{!"any pointer", !20, i64 0} -; AMDGPU-DISABLED: [[LOOP28]] = distinct !{!28, !23, !24} -; AMDGPU-DISABLED: [[LOOP29]] = distinct !{!29, !23, !24} -; NVPTX-DISABLED: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5} -; NVPTX-DISABLED: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1} -; NVPTX-DISABLED: [[META2:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0} -; NVPTX-DISABLED: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2} -; NVPTX-DISABLED: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4} -; NVPTX-DISABLED: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3} -; NVPTX-DISABLED: [[META6:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1} -; NVPTX-DISABLED: [[META7:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1} -; NVPTX-DISABLED: [[META8:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1} -; NVPTX-DISABLED: [[META9:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1} -; NVPTX-DISABLED: [[META10:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1} -; NVPTX-DISABLED: [[META11:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1} -; NVPTX-DISABLED: [[META12:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} -; NVPTX-DISABLED: [[META13:![0-9]+]] = !{i32 7, !"openmp", i32 50} -; NVPTX-DISABLED: [[META14:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} -; NVPTX-DISABLED: [[META15:![0-9]+]] = !{i32 8, !"PIC Level", i32 2} -; NVPTX-DISABLED: [[META16:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} -; NVPTX-DISABLED: [[META17:![0-9]+]] = !{!"clang version 14.0.0"} -; NVPTX-DISABLED: [[TBAA18]] = !{!19, !19, i64 0} -; NVPTX-DISABLED: [[META19:![0-9]+]] = !{!"int", !20, i64 0} -; NVPTX-DISABLED: [[META20:![0-9]+]] = !{!"omnipotent char", !21, i64 0} -; NVPTX-DISABLED: [[META21:![0-9]+]] = !{!"Simple C/C++ TBAA"} -; NVPTX-DISABLED: [[LOOP22]] = distinct !{!22, !23, !24} -; NVPTX-DISABLED: [[META23:![0-9]+]] = !{!"llvm.loop.mustprogress"} -; NVPTX-DISABLED: [[META24:![0-9]+]] = !{!"llvm.loop.unroll.disable"} -; NVPTX-DISABLED: [[LOOP25]] = distinct !{!25, !23, !24} -; NVPTX-DISABLED: [[TBAA26]] = !{!27, !27, i64 0} -; NVPTX-DISABLED: [[META27:![0-9]+]] = !{!"any pointer", !20, i64 0} -; NVPTX-DISABLED: [[LOOP28]] = distinct !{!28, !23, !24} -; NVPTX-DISABLED: [[LOOP29]] = distinct !{!29, !23, !24} ;. ; AMDGPU: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind } ; AMDGPU: attributes #[[ATTR1]] = { norecurse } @@ -4504,61 +2422,33 @@ attributes #11 = { convergent } ; NVPTX: attributes #[[ATTR11:[0-9]+]] = { alwaysinline } ; NVPTX: attributes #[[ATTR12:[0-9]+]] = { convergent nounwind } ;. -; AMDGPU-DISABLED1: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind } -; AMDGPU-DISABLED1: attributes #[[ATTR1]] = { norecurse } -; AMDGPU-DISABLED1: attributes #[[ATTR2]] = { convergent norecurse nounwind } -; AMDGPU-DISABLED1: attributes #[[ATTR3]] = { alwaysinline norecurse nounwind } -; AMDGPU-DISABLED1: attributes #[[ATTR4]] = { alwaysinline convergent nounwind } -; AMDGPU-DISABLED1: attributes #[[ATTR5]] = { nounwind } -; AMDGPU-DISABLED1: attributes #[[ATTR6:[0-9]+]] = { nosync nounwind } -; AMDGPU-DISABLED1: attributes #[[ATTR7:[0-9]+]] = { nofree nosync nounwind allocsize(0) } -; AMDGPU-DISABLED1: attributes #[[ATTR8]] = { convergent "llvm.assume"="ompx_spmd_amenable" } -; AMDGPU-DISABLED1: attributes #[[ATTR9]] = { convergent } -; AMDGPU-DISABLED1: attributes #[[ATTR10:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } -; AMDGPU-DISABLED1: attributes #[[ATTR11:[0-9]+]] = { alwaysinline } -; AMDGPU-DISABLED1: attributes #[[ATTR12:[0-9]+]] = { convergent nounwind } -;. -; AMDGPU-DISABLED2: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind } -; AMDGPU-DISABLED2: attributes #[[ATTR1]] = { norecurse } -; AMDGPU-DISABLED2: attributes #[[ATTR2]] = { convergent norecurse nounwind } -; AMDGPU-DISABLED2: attributes #[[ATTR3]] = { alwaysinline norecurse nounwind } -; AMDGPU-DISABLED2: attributes #[[ATTR4]] = { alwaysinline convergent nounwind } -; AMDGPU-DISABLED2: attributes #[[ATTR5]] = { nounwind } -; AMDGPU-DISABLED2: attributes #[[ATTR6:[0-9]+]] = { nosync nounwind } -; AMDGPU-DISABLED2: attributes #[[ATTR7:[0-9]+]] = { nofree nosync nounwind allocsize(0) } -; AMDGPU-DISABLED2: attributes #[[ATTR8]] = { convergent "llvm.assume"="ompx_spmd_amenable" } -; AMDGPU-DISABLED2: attributes #[[ATTR9]] = { convergent } -; AMDGPU-DISABLED2: attributes #[[ATTR10:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } -; AMDGPU-DISABLED2: attributes #[[ATTR11:[0-9]+]] = { alwaysinline } -; AMDGPU-DISABLED2: attributes #[[ATTR12:[0-9]+]] = { convergent nounwind } -;. -; NVPTX-DISABLED1: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind } -; NVPTX-DISABLED1: attributes #[[ATTR1]] = { norecurse } -; NVPTX-DISABLED1: attributes #[[ATTR2]] = { convergent norecurse nounwind } -; NVPTX-DISABLED1: attributes #[[ATTR3]] = { alwaysinline norecurse nounwind } -; NVPTX-DISABLED1: attributes #[[ATTR4]] = { alwaysinline convergent nounwind } -; NVPTX-DISABLED1: attributes #[[ATTR5]] = { nounwind } -; NVPTX-DISABLED1: attributes #[[ATTR6:[0-9]+]] = { nosync nounwind } -; NVPTX-DISABLED1: attributes #[[ATTR7:[0-9]+]] = { nofree nosync nounwind allocsize(0) } -; NVPTX-DISABLED1: attributes #[[ATTR8]] = { convergent "llvm.assume"="ompx_spmd_amenable" } -; NVPTX-DISABLED1: attributes #[[ATTR9]] = { convergent } -; NVPTX-DISABLED1: attributes #[[ATTR10:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } -; NVPTX-DISABLED1: attributes #[[ATTR11:[0-9]+]] = { alwaysinline } -; NVPTX-DISABLED1: attributes #[[ATTR12:[0-9]+]] = { convergent nounwind } +; AMDGPU-DISABLED: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind } +; AMDGPU-DISABLED: attributes #[[ATTR1]] = { norecurse } +; AMDGPU-DISABLED: attributes #[[ATTR2]] = { convergent norecurse nounwind } +; AMDGPU-DISABLED: attributes #[[ATTR3]] = { alwaysinline norecurse nounwind } +; AMDGPU-DISABLED: attributes #[[ATTR4]] = { alwaysinline convergent nounwind } +; AMDGPU-DISABLED: attributes #[[ATTR5]] = { nounwind } +; AMDGPU-DISABLED: attributes #[[ATTR6:[0-9]+]] = { nosync nounwind } +; AMDGPU-DISABLED: attributes #[[ATTR7:[0-9]+]] = { nofree nosync nounwind allocsize(0) } +; AMDGPU-DISABLED: attributes #[[ATTR8]] = { convergent "llvm.assume"="ompx_spmd_amenable" } +; AMDGPU-DISABLED: attributes #[[ATTR9]] = { convergent } +; AMDGPU-DISABLED: attributes #[[ATTR10:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +; AMDGPU-DISABLED: attributes #[[ATTR11:[0-9]+]] = { alwaysinline } +; AMDGPU-DISABLED: attributes #[[ATTR12:[0-9]+]] = { convergent nounwind } ;. -; NVPTX-DISABLED2: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind } -; NVPTX-DISABLED2: attributes #[[ATTR1]] = { norecurse } -; NVPTX-DISABLED2: attributes #[[ATTR2]] = { convergent norecurse nounwind } -; NVPTX-DISABLED2: attributes #[[ATTR3]] = { alwaysinline norecurse nounwind } -; NVPTX-DISABLED2: attributes #[[ATTR4]] = { alwaysinline convergent nounwind } -; NVPTX-DISABLED2: attributes #[[ATTR5]] = { nounwind } -; NVPTX-DISABLED2: attributes #[[ATTR6:[0-9]+]] = { nosync nounwind } -; NVPTX-DISABLED2: attributes #[[ATTR7:[0-9]+]] = { nofree nosync nounwind allocsize(0) } -; NVPTX-DISABLED2: attributes #[[ATTR8]] = { convergent "llvm.assume"="ompx_spmd_amenable" } -; NVPTX-DISABLED2: attributes #[[ATTR9]] = { convergent } -; NVPTX-DISABLED2: attributes #[[ATTR10:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } -; NVPTX-DISABLED2: attributes #[[ATTR11:[0-9]+]] = { alwaysinline } -; NVPTX-DISABLED2: attributes #[[ATTR12:[0-9]+]] = { convergent nounwind } +; NVPTX-DISABLED: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind } +; NVPTX-DISABLED: attributes #[[ATTR1]] = { norecurse } +; NVPTX-DISABLED: attributes #[[ATTR2]] = { convergent norecurse nounwind } +; NVPTX-DISABLED: attributes #[[ATTR3]] = { alwaysinline norecurse nounwind } +; NVPTX-DISABLED: attributes #[[ATTR4]] = { alwaysinline convergent nounwind } +; NVPTX-DISABLED: attributes #[[ATTR5]] = { nounwind } +; NVPTX-DISABLED: attributes #[[ATTR6:[0-9]+]] = { nosync nounwind } +; NVPTX-DISABLED: attributes #[[ATTR7:[0-9]+]] = { nofree nosync nounwind allocsize(0) } +; NVPTX-DISABLED: attributes #[[ATTR8]] = { convergent "llvm.assume"="ompx_spmd_amenable" } +; NVPTX-DISABLED: attributes #[[ATTR9]] = { convergent } +; NVPTX-DISABLED: attributes #[[ATTR10:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +; NVPTX-DISABLED: attributes #[[ATTR11:[0-9]+]] = { alwaysinline } +; NVPTX-DISABLED: attributes #[[ATTR12:[0-9]+]] = { convergent nounwind } ;. ; AMDGPU: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5} ; AMDGPU: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1} @@ -4622,127 +2512,65 @@ attributes #11 = { convergent } ; NVPTX: [[LOOP28]] = distinct !{!28, !23, !24} ; NVPTX: [[LOOP29]] = distinct !{!29, !23, !24} ;. -; AMDGPU-DISABLED1: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5} -; AMDGPU-DISABLED1: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1} -; AMDGPU-DISABLED1: [[META2:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0} -; AMDGPU-DISABLED1: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2} -; AMDGPU-DISABLED1: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4} -; AMDGPU-DISABLED1: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3} -; AMDGPU-DISABLED1: [[META6:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1} -; AMDGPU-DISABLED1: [[META7:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1} -; AMDGPU-DISABLED1: [[META8:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1} -; AMDGPU-DISABLED1: [[META9:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1} -; AMDGPU-DISABLED1: [[META10:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1} -; AMDGPU-DISABLED1: [[META11:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1} -; AMDGPU-DISABLED1: [[META12:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} -; AMDGPU-DISABLED1: [[META13:![0-9]+]] = !{i32 7, !"openmp", i32 50} -; AMDGPU-DISABLED1: [[META14:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} -; AMDGPU-DISABLED1: [[META15:![0-9]+]] = !{i32 8, !"PIC Level", i32 2} -; AMDGPU-DISABLED1: [[META16:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} -; AMDGPU-DISABLED1: [[META17:![0-9]+]] = !{!"clang version 14.0.0"} -; AMDGPU-DISABLED1: [[TBAA18]] = !{!19, !19, i64 0} -; AMDGPU-DISABLED1: [[META19:![0-9]+]] = !{!"int", !20, i64 0} -; AMDGPU-DISABLED1: [[META20:![0-9]+]] = !{!"omnipotent char", !21, i64 0} -; AMDGPU-DISABLED1: [[META21:![0-9]+]] = !{!"Simple C/C++ TBAA"} -; AMDGPU-DISABLED1: [[LOOP22]] = distinct !{!22, !23, !24} -; AMDGPU-DISABLED1: [[META23:![0-9]+]] = !{!"llvm.loop.mustprogress"} -; AMDGPU-DISABLED1: [[META24:![0-9]+]] = !{!"llvm.loop.unroll.disable"} -; AMDGPU-DISABLED1: [[LOOP25]] = distinct !{!25, !23, !24} -; AMDGPU-DISABLED1: [[TBAA26]] = !{!27, !27, i64 0} -; AMDGPU-DISABLED1: [[META27:![0-9]+]] = !{!"any pointer", !20, i64 0} -; AMDGPU-DISABLED1: [[LOOP28]] = distinct !{!28, !23, !24} -; AMDGPU-DISABLED1: [[LOOP29]] = distinct !{!29, !23, !24} -;. -; AMDGPU-DISABLED2: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5} -; AMDGPU-DISABLED2: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1} -; AMDGPU-DISABLED2: [[META2:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0} -; AMDGPU-DISABLED2: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2} -; AMDGPU-DISABLED2: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4} -; AMDGPU-DISABLED2: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3} -; AMDGPU-DISABLED2: [[META6:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1} -; AMDGPU-DISABLED2: [[META7:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1} -; AMDGPU-DISABLED2: [[META8:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1} -; AMDGPU-DISABLED2: [[META9:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1} -; AMDGPU-DISABLED2: [[META10:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1} -; AMDGPU-DISABLED2: [[META11:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1} -; AMDGPU-DISABLED2: [[META12:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} -; AMDGPU-DISABLED2: [[META13:![0-9]+]] = !{i32 7, !"openmp", i32 50} -; AMDGPU-DISABLED2: [[META14:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} -; AMDGPU-DISABLED2: [[META15:![0-9]+]] = !{i32 8, !"PIC Level", i32 2} -; AMDGPU-DISABLED2: [[META16:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} -; AMDGPU-DISABLED2: [[META17:![0-9]+]] = !{!"clang version 14.0.0"} -; AMDGPU-DISABLED2: [[TBAA18]] = !{!19, !19, i64 0} -; AMDGPU-DISABLED2: [[META19:![0-9]+]] = !{!"int", !20, i64 0} -; AMDGPU-DISABLED2: [[META20:![0-9]+]] = !{!"omnipotent char", !21, i64 0} -; AMDGPU-DISABLED2: [[META21:![0-9]+]] = !{!"Simple C/C++ TBAA"} -; AMDGPU-DISABLED2: [[LOOP22]] = distinct !{!22, !23, !24} -; AMDGPU-DISABLED2: [[META23:![0-9]+]] = !{!"llvm.loop.mustprogress"} -; AMDGPU-DISABLED2: [[META24:![0-9]+]] = !{!"llvm.loop.unroll.disable"} -; AMDGPU-DISABLED2: [[LOOP25]] = distinct !{!25, !23, !24} -; AMDGPU-DISABLED2: [[TBAA26]] = !{!27, !27, i64 0} -; AMDGPU-DISABLED2: [[META27:![0-9]+]] = !{!"any pointer", !20, i64 0} -; AMDGPU-DISABLED2: [[LOOP28]] = distinct !{!28, !23, !24} -; AMDGPU-DISABLED2: [[LOOP29]] = distinct !{!29, !23, !24} -;. -; NVPTX-DISABLED1: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5} -; NVPTX-DISABLED1: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1} -; NVPTX-DISABLED1: [[META2:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0} -; NVPTX-DISABLED1: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2} -; NVPTX-DISABLED1: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4} -; NVPTX-DISABLED1: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3} -; NVPTX-DISABLED1: [[META6:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1} -; NVPTX-DISABLED1: [[META7:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1} -; NVPTX-DISABLED1: [[META8:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1} -; NVPTX-DISABLED1: [[META9:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1} -; NVPTX-DISABLED1: [[META10:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1} -; NVPTX-DISABLED1: [[META11:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1} -; NVPTX-DISABLED1: [[META12:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} -; NVPTX-DISABLED1: [[META13:![0-9]+]] = !{i32 7, !"openmp", i32 50} -; NVPTX-DISABLED1: [[META14:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} -; NVPTX-DISABLED1: [[META15:![0-9]+]] = !{i32 8, !"PIC Level", i32 2} -; NVPTX-DISABLED1: [[META16:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} -; NVPTX-DISABLED1: [[META17:![0-9]+]] = !{!"clang version 14.0.0"} -; NVPTX-DISABLED1: [[TBAA18]] = !{!19, !19, i64 0} -; NVPTX-DISABLED1: [[META19:![0-9]+]] = !{!"int", !20, i64 0} -; NVPTX-DISABLED1: [[META20:![0-9]+]] = !{!"omnipotent char", !21, i64 0} -; NVPTX-DISABLED1: [[META21:![0-9]+]] = !{!"Simple C/C++ TBAA"} -; NVPTX-DISABLED1: [[LOOP22]] = distinct !{!22, !23, !24} -; NVPTX-DISABLED1: [[META23:![0-9]+]] = !{!"llvm.loop.mustprogress"} -; NVPTX-DISABLED1: [[META24:![0-9]+]] = !{!"llvm.loop.unroll.disable"} -; NVPTX-DISABLED1: [[LOOP25]] = distinct !{!25, !23, !24} -; NVPTX-DISABLED1: [[TBAA26]] = !{!27, !27, i64 0} -; NVPTX-DISABLED1: [[META27:![0-9]+]] = !{!"any pointer", !20, i64 0} -; NVPTX-DISABLED1: [[LOOP28]] = distinct !{!28, !23, !24} -; NVPTX-DISABLED1: [[LOOP29]] = distinct !{!29, !23, !24} +; AMDGPU-DISABLED: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5} +; AMDGPU-DISABLED: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1} +; AMDGPU-DISABLED: [[META2:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0} +; AMDGPU-DISABLED: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2} +; AMDGPU-DISABLED: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4} +; AMDGPU-DISABLED: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3} +; AMDGPU-DISABLED: [[META6:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1} +; AMDGPU-DISABLED: [[META7:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1} +; AMDGPU-DISABLED: [[META8:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1} +; AMDGPU-DISABLED: [[META9:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1} +; AMDGPU-DISABLED: [[META10:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1} +; AMDGPU-DISABLED: [[META11:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1} +; AMDGPU-DISABLED: [[META12:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +; AMDGPU-DISABLED: [[META13:![0-9]+]] = !{i32 7, !"openmp", i32 50} +; AMDGPU-DISABLED: [[META14:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} +; AMDGPU-DISABLED: [[META15:![0-9]+]] = !{i32 8, !"PIC Level", i32 2} +; AMDGPU-DISABLED: [[META16:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} +; AMDGPU-DISABLED: [[META17:![0-9]+]] = !{!"clang version 14.0.0"} +; AMDGPU-DISABLED: [[TBAA18]] = !{!19, !19, i64 0} +; AMDGPU-DISABLED: [[META19:![0-9]+]] = !{!"int", !20, i64 0} +; AMDGPU-DISABLED: [[META20:![0-9]+]] = !{!"omnipotent char", !21, i64 0} +; AMDGPU-DISABLED: [[META21:![0-9]+]] = !{!"Simple C/C++ TBAA"} +; AMDGPU-DISABLED: [[LOOP22]] = distinct !{!22, !23, !24} +; AMDGPU-DISABLED: [[META23:![0-9]+]] = !{!"llvm.loop.mustprogress"} +; AMDGPU-DISABLED: [[META24:![0-9]+]] = !{!"llvm.loop.unroll.disable"} +; AMDGPU-DISABLED: [[LOOP25]] = distinct !{!25, !23, !24} +; AMDGPU-DISABLED: [[TBAA26]] = !{!27, !27, i64 0} +; AMDGPU-DISABLED: [[META27:![0-9]+]] = !{!"any pointer", !20, i64 0} +; AMDGPU-DISABLED: [[LOOP28]] = distinct !{!28, !23, !24} +; AMDGPU-DISABLED: [[LOOP29]] = distinct !{!29, !23, !24} ;. -; NVPTX-DISABLED2: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5} -; NVPTX-DISABLED2: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1} -; NVPTX-DISABLED2: [[META2:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0} -; NVPTX-DISABLED2: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2} -; NVPTX-DISABLED2: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4} -; NVPTX-DISABLED2: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3} -; NVPTX-DISABLED2: [[META6:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1} -; NVPTX-DISABLED2: [[META7:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1} -; NVPTX-DISABLED2: [[META8:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1} -; NVPTX-DISABLED2: [[META9:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1} -; NVPTX-DISABLED2: [[META10:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1} -; NVPTX-DISABLED2: [[META11:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1} -; NVPTX-DISABLED2: [[META12:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} -; NVPTX-DISABLED2: [[META13:![0-9]+]] = !{i32 7, !"openmp", i32 50} -; NVPTX-DISABLED2: [[META14:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} -; NVPTX-DISABLED2: [[META15:![0-9]+]] = !{i32 8, !"PIC Level", i32 2} -; NVPTX-DISABLED2: [[META16:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} -; NVPTX-DISABLED2: [[META17:![0-9]+]] = !{!"clang version 14.0.0"} -; NVPTX-DISABLED2: [[TBAA18]] = !{!19, !19, i64 0} -; NVPTX-DISABLED2: [[META19:![0-9]+]] = !{!"int", !20, i64 0} -; NVPTX-DISABLED2: [[META20:![0-9]+]] = !{!"omnipotent char", !21, i64 0} -; NVPTX-DISABLED2: [[META21:![0-9]+]] = !{!"Simple C/C++ TBAA"} -; NVPTX-DISABLED2: [[LOOP22]] = distinct !{!22, !23, !24} -; NVPTX-DISABLED2: [[META23:![0-9]+]] = !{!"llvm.loop.mustprogress"} -; NVPTX-DISABLED2: [[META24:![0-9]+]] = !{!"llvm.loop.unroll.disable"} -; NVPTX-DISABLED2: [[LOOP25]] = distinct !{!25, !23, !24} -; NVPTX-DISABLED2: [[TBAA26]] = !{!27, !27, i64 0} -; NVPTX-DISABLED2: [[META27:![0-9]+]] = !{!"any pointer", !20, i64 0} -; NVPTX-DISABLED2: [[LOOP28]] = distinct !{!28, !23, !24} -; NVPTX-DISABLED2: [[LOOP29]] = distinct !{!29, !23, !24} +; NVPTX-DISABLED: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5} +; NVPTX-DISABLED: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1} +; NVPTX-DISABLED: [[META2:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0} +; NVPTX-DISABLED: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2} +; NVPTX-DISABLED: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4} +; NVPTX-DISABLED: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3} +; NVPTX-DISABLED: [[META6:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1} +; NVPTX-DISABLED: [[META7:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1} +; NVPTX-DISABLED: [[META8:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1} +; NVPTX-DISABLED: [[META9:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1} +; NVPTX-DISABLED: [[META10:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1} +; NVPTX-DISABLED: [[META11:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1} +; NVPTX-DISABLED: [[META12:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +; NVPTX-DISABLED: [[META13:![0-9]+]] = !{i32 7, !"openmp", i32 50} +; NVPTX-DISABLED: [[META14:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} +; NVPTX-DISABLED: [[META15:![0-9]+]] = !{i32 8, !"PIC Level", i32 2} +; NVPTX-DISABLED: [[META16:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} +; NVPTX-DISABLED: [[META17:![0-9]+]] = !{!"clang version 14.0.0"} +; NVPTX-DISABLED: [[TBAA18]] = !{!19, !19, i64 0} +; NVPTX-DISABLED: [[META19:![0-9]+]] = !{!"int", !20, i64 0} +; NVPTX-DISABLED: [[META20:![0-9]+]] = !{!"omnipotent char", !21, i64 0} +; NVPTX-DISABLED: [[META21:![0-9]+]] = !{!"Simple C/C++ TBAA"} +; NVPTX-DISABLED: [[LOOP22]] = distinct !{!22, !23, !24} +; NVPTX-DISABLED: [[META23:![0-9]+]] = !{!"llvm.loop.mustprogress"} +; NVPTX-DISABLED: [[META24:![0-9]+]] = !{!"llvm.loop.unroll.disable"} +; NVPTX-DISABLED: [[LOOP25]] = distinct !{!25, !23, !24} +; NVPTX-DISABLED: [[TBAA26]] = !{!27, !27, i64 0} +; NVPTX-DISABLED: [[META27:![0-9]+]] = !{!"any pointer", !20, i64 0} +; NVPTX-DISABLED: [[LOOP28]] = distinct !{!28, !23, !24} +; NVPTX-DISABLED: [[LOOP29]] = distinct !{!29, !23, !24} ;. diff --git a/llvm/test/Transforms/OpenMP/spmdization_assumes.ll b/llvm/test/Transforms/OpenMP/spmdization_assumes.ll index 019e5ecccd723..b254513942ffa 100644 --- a/llvm/test/Transforms/OpenMP/spmdization_assumes.ll +++ b/llvm/test/Transforms/OpenMP/spmdization_assumes.ll @@ -13,19 +13,19 @@ target triple = "nvptx64" %struct.ident_t = type { i32, i32, i32, i32, ptr } -%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr } -%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 } @0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8 -@__omp_offloading_fd02_404433c2_main_l5_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1 }, ptr @1, ptr null } - +@__omp_offloading_fd02_404433c2_main_l5_exec_mode = weak constant i8 1 +@llvm.compiler.used = appending global [1 x ptr] [ptr @__omp_offloading_fd02_404433c2_main_l5_exec_mode], section "llvm.metadata" ; Function Attrs: alwaysinline convergent norecurse nounwind ;. ; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" ; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 -; CHECK: @[[__OMP_OFFLOADING_FD02_404433C2_MAIN_L5_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 3 }, ptr @[[GLOB1]], ptr null } +; CHECK: @[[__OMP_OFFLOADING_FD02_404433C2_MAIN_L5_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 +; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [1 x ptr] [ptr @__omp_offloading_fd02_404433c2_main_l5_exec_mode], section "llvm.metadata" +; CHECK: @[[__OMP_OFFLOADING_FD02_404433C2_MAIN_L5_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0 ; CHECK: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8 ;. define weak void @__omp_offloading_fd02_404433c2_main_l5(ptr nonnull align 8 dereferenceable(8) %x) local_unnamed_addr #0 { @@ -33,7 +33,7 @@ define weak void @__omp_offloading_fd02_404433c2_main_l5(ptr nonnull align 8 der ; CHECK-SAME: (ptr nonnull align 8 dereferenceable(8) [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr nonnull @__omp_offloading_fd02_404433c2_main_l5_kernel_environment) #[[ATTR3:[0-9]+]] +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr nonnull @[[GLOB1]], i8 2, i1 false) #[[ATTR3:[0-9]+]] ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] ; CHECK: common.ret: @@ -56,12 +56,12 @@ define weak void @__omp_offloading_fd02_404433c2_main_l5(ptr nonnull align 8 der ; CHECK-NEXT: br label [[REGION_EXIT:%.*]] ; CHECK: region.exit: ; CHECK-NEXT: call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS]], i64 0) #[[ATTR3]] -; CHECK-NEXT: call void @__kmpc_target_deinit() #[[ATTR3]] +; CHECK-NEXT: call void @__kmpc_target_deinit(ptr nonnull @[[GLOB1]], i8 2) #[[ATTR3]] ; CHECK-NEXT: br label [[COMMON_RET]] ; entry: %captured_vars_addrs = alloca [0 x ptr], align 8 - %0 = call i32 @__kmpc_target_init(ptr nonnull @__omp_offloading_fd02_404433c2_main_l5_kernel_environment) #3 + %0 = call i32 @__kmpc_target_init(ptr nonnull @1, i8 1, i1 true) #3 %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %common.ret @@ -73,11 +73,11 @@ user_code.entry: ; preds = %entry %call.i = call double @__nv_sin(double 0x400921FB54442D18) #6 store double %call.i, ptr %x, align 8, !tbaa !8 call void @__kmpc_parallel_51(ptr nonnull @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr nonnull %captured_vars_addrs, i64 0) #3 - call void @__kmpc_target_deinit() #3 + call void @__kmpc_target_deinit(ptr nonnull @1, i8 1) #3 br label %common.ret } -declare i32 @__kmpc_target_init(ptr) local_unnamed_addr +declare i32 @__kmpc_target_init(ptr, i8, i1) local_unnamed_addr ; Function Attrs: alwaysinline mustprogress nofree norecurse nosync nounwind readnone willreturn define internal void @__omp_outlined__(ptr noalias nocapture %.global_tid., ptr noalias nocapture %.bound_tid.) #1 { @@ -113,7 +113,7 @@ declare i32 @__kmpc_global_thread_num(ptr) local_unnamed_addr #3 ; Function Attrs: alwaysinline declare void @__kmpc_parallel_51(ptr, i32, i32, i32, i32, ptr, ptr, ptr, i64) local_unnamed_addr #4 -declare void @__kmpc_target_deinit() local_unnamed_addr +declare void @__kmpc_target_deinit(ptr, i8) local_unnamed_addr ; Function Attrs: convergent declare double @__nv_sin(double) local_unnamed_addr #5 diff --git a/llvm/test/Transforms/OpenMP/spmdization_constant_prop.ll b/llvm/test/Transforms/OpenMP/spmdization_constant_prop.ll index 1bf859cb6b4aa..c7c34d342964d 100644 --- a/llvm/test/Transforms/OpenMP/spmdization_constant_prop.ll +++ b/llvm/test/Transforms/OpenMP/spmdization_constant_prop.ll @@ -2,8 +2,8 @@ ; ; Verify we change it to SPMD mode but also avoid propagating the old mode (=generic) into the __kmpc_target_init function. ; -; CHECK: @__omp_offloading_20_11e3950_main_l12_kernel_environment = local_unnamed_addr addrspace(1) constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 3 }, ptr addrspacecast (ptr addrspace(1) @1 to ptr), ptr null } ; CHECK-NOT: store i32 0, ptr addrspace(3) @IsSPMDMode +; CHECK: call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @1 to ptr), i8 2, i1 false) ; CHECK-NOT: store i32 0, ptr addrspace(3) @IsSPMDMode ; CHECK: store i32 1, ptr addrspace(3) @IsSPMDMode ; CHECK-NOT: store i32 0, ptr addrspace(3) @IsSPMDMode @@ -13,8 +13,6 @@ target triple = "amdgcn-amd-amdhsa" %struct.ident_t = type { i32, i32, i32, i32, ptr } %struct.DeviceEnvironmentTy = type { i32, i32, i32, i32 } -%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr } -%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 } %"struct.(anonymous namespace)::SharedMemorySmartStackTy" = type { [512 x i8], [1024 x i8] } %"struct.(anonymous namespace)::TeamStateTy" = type { %"struct.(anonymous namespace)::ICVStateTy", i32, ptr } %"struct.(anonymous namespace)::ICVStateTy" = type { i32, i32, i32, i32, i32, i32 } @@ -27,7 +25,6 @@ target triple = "amdgcn-amd-amdhsa" @__omp_rtl_debug_kind = weak_odr hidden local_unnamed_addr addrspace(1) constant i32 0 @__omp_rtl_assume_no_thread_state = weak_odr hidden local_unnamed_addr addrspace(1) constant i32 0 @omptarget_device_environment = weak protected addrspace(4) global %struct.DeviceEnvironmentTy undef, align 4 -@__omp_offloading_20_11e3950_main_l12_kernel_environment = local_unnamed_addr addrspace(1) constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr addrspacecast (ptr addrspace(1) @1 to ptr), ptr null } @IsSPMDMode = weak hidden addrspace(3) global i32 undef, align 4 @.str.12 = private unnamed_addr addrspace(4) constant [47 x i8] c"ValueRAII initialization with wrong old value!\00", align 1 @_ZN12_GLOBAL__N_122SharedMemorySmartStackE = internal addrspace(3) global %"struct.(anonymous namespace)::SharedMemorySmartStackTy" undef, align 16 @@ -44,7 +41,7 @@ define weak_odr amdgpu_kernel void @__omp_offloading_20_11e3950_main_l12(i64 nou entry: %ng1 = alloca i32, align 4 %captured_vars_addrs = alloca [2 x ptr], align 8, addrspace(5) - %0 = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @__omp_offloading_20_11e3950_main_l12_kernel_environment to ptr)) + %0 = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @1 to ptr), i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %common.ret @@ -52,7 +49,7 @@ user_code.entry: ; preds = %entry %captured_vars_addrs.ascast = addrspacecast ptr addrspace(5) %captured_vars_addrs to ptr store ptr %ng1, ptr addrspace(5) %captured_vars_addrs, align 8, !tbaa !7 call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @1 to ptr), i32 0, i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__, ptr nonnull @__omp_outlined___wrapper, ptr nonnull %captured_vars_addrs.ascast, i64 2) - call void @__kmpc_target_deinit() + call void @__kmpc_target_deinit(ptr addrspacecast (ptr addrspace(1) @1 to ptr), i8 1) br label %common.ret common.ret: ; preds = %user_code.entry, %entry @@ -107,12 +104,9 @@ entry: } ; Function Attrs: convergent nounwind -; define internal i32 @__kmpc_target_init(ptr nocapture noundef readnone %Ident, i8 noundef signext %Mode, i1 noundef zeroext %UseGenericStateMachine) local_unnamed_addr #9 { -define internal i32 @__kmpc_target_init(ptr nofree noundef nonnull align 8 dereferenceable(24) %KernelEnvironment) local_unnamed_addr #9 { +define internal i32 @__kmpc_target_init(ptr nocapture noundef readnone %Ident, i8 noundef signext %Mode, i1 noundef zeroext %UseGenericStateMachine) local_unnamed_addr #9 { entry: %0 = and i32 undef, undef - %ExecMode = getelementptr inbounds %struct.ConfigurationEnvironmentTy, ptr %KernelEnvironment, i64 0, i32 2 - %Mode = load i8, ptr %ExecMode, align 2, !tbaa !28 %1 = and i8 %Mode, 2 %tobool.not = icmp eq i8 %1, 0 br i1 %tobool.not, label %if.else, label %if.then @@ -254,8 +248,6 @@ _ZN4ompx7mapping23isInitialThreadInLevel0Eb.exit: ; preds = %if.end if.end10: ; preds = %_ZN4ompx7mapping23isInitialThreadInLevel0Eb.exit %sub.i = add nsw i32 %24, -64 %cmp = icmp ult i32 %25, %sub.i - %34 = load i8, ptr %KernelEnvironment, align 8 - %UseGenericStateMachine = icmp ne i8 %34, 0 %or.cond251 = select i1 %UseGenericStateMachine, i1 %cmp, i1 false br i1 %or.cond251, label %do.body.i, label %_ZN14DebugEntryRAIID2Ev.exit250 @@ -269,7 +261,7 @@ _ZN14DebugEntryRAIID2Ev.exit250: ; preds = %do.body.i, %if.end1 } ; Function Attrs: nounwind -define internal void @__kmpc_target_deinit() local_unnamed_addr #10 { +define internal void @__kmpc_target_deinit(ptr nocapture noundef readnone %Ident, i8 noundef signext %Mode) local_unnamed_addr #10 { ret void } diff --git a/llvm/test/Transforms/OpenMP/spmdization_guarding.ll b/llvm/test/Transforms/OpenMP/spmdization_guarding.ll index 3830fa93252e8..bc9abf1212dfd 100644 --- a/llvm/test/Transforms/OpenMP/spmdization_guarding.ll +++ b/llvm/test/Transforms/OpenMP/spmdization_guarding.ll @@ -35,27 +35,29 @@ target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" target triple = "nvptx64" %struct.ident_t = type { i32, i32, i32, i32, ptr } -%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 } -%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr } @0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8 +@__omp_offloading_2a_fbfa7a_sequential_loop_l6_exec_mode = weak constant i8 1 +@llvm.compiler.used = appending global [1 x ptr] [ptr @__omp_offloading_2a_fbfa7a_sequential_loop_l6_exec_mode], section "llvm.metadata" @LocGlob = private unnamed_addr addrspace(5) global i32 43 -@__omp_offloading_2a_fbfa7a_sequential_loop_l6_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null } - ; Function Attrs: convergent norecurse nounwind ;. ; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" ; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 +; CHECK: @[[__OMP_OFFLOADING_2A_FBFA7A_SEQUENTIAL_LOOP_L6_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 +; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [1 x ptr] [ptr @__omp_offloading_2a_fbfa7a_sequential_loop_l6_exec_mode], section "llvm.metadata" ; CHECK: @[[LOCGLOB:[a-zA-Z0-9_$"\\.-]+]] = private unnamed_addr addrspace(5) global i32 43 -; CHECK: @[[__OMP_OFFLOADING_2A_FBFA7A_SEQUENTIAL_LOOP_L6_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 3 }, ptr @[[GLOB1]], ptr null } +; CHECK: @[[__OMP_OFFLOADING_2A_FBFA7A_SEQUENTIAL_LOOP_L6_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0 ; CHECK: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8 ;. ; CHECK-DISABLED: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" ; CHECK-DISABLED: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 +; CHECK-DISABLED: @[[__OMP_OFFLOADING_2A_FBFA7A_SEQUENTIAL_LOOP_L6_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 +; CHECK-DISABLED: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [1 x ptr] [ptr @__omp_offloading_2a_fbfa7a_sequential_loop_l6_exec_mode], section "llvm.metadata" ; CHECK-DISABLED: @[[LOCGLOB:[a-zA-Z0-9_$"\\.-]+]] = private unnamed_addr addrspace(5) global i32 43 -; CHECK-DISABLED: @[[__OMP_OFFLOADING_2A_FBFA7A_SEQUENTIAL_LOOP_L6_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null } +; CHECK-DISABLED: @[[__OMP_OFFLOADING_2A_FBFA7A_SEQUENTIAL_LOOP_L6_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0 ; CHECK-DISABLED: @[[__OMP_OUTLINED__1_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef ;. define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %x, i64 %N) #0 { @@ -66,7 +68,7 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %x, i64 %N) ; CHECK-NEXT: [[LOC:%.*]] = alloca ptr, align 8 ; CHECK-NEXT: [[AL32:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[N_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[N]] to i32 -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr nonnull @__omp_offloading_2a_fbfa7a_sequential_loop_l6_kernel_environment) #[[ATTR6:[0-9]+]] +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr nonnull @[[GLOB1]], i8 2, i1 false) #[[ATTR6:[0-9]+]] ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK: user_code.entry: @@ -177,7 +179,7 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %x, i64 %N) ; CHECK-NEXT: [[CALL14_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias !8 ; CHECK-NEXT: [[CALL15_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias !8 ; CHECK-NEXT: [[CALL16_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias !8 -; CHECK-NEXT: call void @__kmpc_target_deinit() #[[ATTR6]] +; CHECK-NEXT: call void @__kmpc_target_deinit(ptr nonnull @[[GLOB1]], i8 2) #[[ATTR6]] ; CHECK-NEXT: ret void ; CHECK: worker.exit: ; CHECK-NEXT: ret void @@ -190,7 +192,7 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %x, i64 %N) ; CHECK-DISABLED-NEXT: [[LOC:%.*]] = alloca ptr, align 8 ; CHECK-DISABLED-NEXT: [[AL32:%.*]] = alloca i32, align 4 ; CHECK-DISABLED-NEXT: [[N_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[N]] to i32 -; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr nonnull @__omp_offloading_2a_fbfa7a_sequential_loop_l6_kernel_environment) #[[ATTR6:[0-9]+]] +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr nonnull @[[GLOB1]], i8 1, i1 false) #[[ATTR6:[0-9]+]] ; CHECK-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; CHECK-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; CHECK-DISABLED: is_worker_check: @@ -269,7 +271,7 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %x, i64 %N) ; CHECK-DISABLED-NEXT: [[CALL14_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias !8 ; CHECK-DISABLED-NEXT: [[CALL15_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias !8 ; CHECK-DISABLED-NEXT: [[CALL16_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias !8 -; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit() #[[ATTR6]] +; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr nonnull @[[GLOB1]], i8 1) #[[ATTR6]] ; CHECK-DISABLED-NEXT: ret void ; CHECK-DISABLED: worker.exit: ; CHECK-DISABLED-NEXT: ret void @@ -278,7 +280,7 @@ entry: %loc = alloca ptr %al32 = alloca i32 %N.addr.sroa.0.0.extract.trunc = trunc i64 %N to i32 - %0 = call i32 @__kmpc_target_init(ptr nonnull @__omp_offloading_2a_fbfa7a_sequential_loop_l6_kernel_environment) #3 + %0 = call i32 @__kmpc_target_init(ptr nonnull @1, i8 1, i1 true) #3 %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -333,7 +335,7 @@ __omp_outlined__.exit: ; preds = %for.cond.i %call14.i = call i32 @no_openmp(ptr nonnull %x) #5, !noalias !8 %call15.i = call i32 @no_openmp(ptr nonnull %x) #5, !noalias !8 %call16.i = call i32 @no_openmp(ptr nonnull %x) #5, !noalias !8 - call void @__kmpc_target_deinit() #3 + call void @__kmpc_target_deinit(ptr nonnull @1, i8 1) #3 ret void worker.exit: ; preds = %entry @@ -366,13 +368,13 @@ define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) { declare void @__kmpc_parallel_51(ptr, i32, i32, i32, i32, ptr, ptr, ptr, i64) ; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning. -define weak i32 @__kmpc_target_init(ptr) { +define weak i32 @__kmpc_target_init(ptr, i8, i1) { ; CHECK-LABEL: define {{[^@]+}}@__kmpc_target_init -; CHECK-SAME: (ptr [[TMP0:%.*]]) { +; CHECK-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { ; CHECK-NEXT: ret i32 0 ; ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__kmpc_target_init -; CHECK-DISABLED-SAME: (ptr [[TMP0:%.*]]) { +; CHECK-DISABLED-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { ; CHECK-DISABLED-NEXT: ret i32 0 ; ret i32 0 @@ -392,7 +394,7 @@ declare void @usei8ptr(ptr) #1 ; Function Attrs: nounwind declare i32 @__kmpc_global_thread_num(ptr) #3 -declare void @__kmpc_target_deinit() +declare void @__kmpc_target_deinit(ptr, i8) ; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn declare void @llvm.experimental.noalias.scope.decl(metadata) #4 diff --git a/llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll b/llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll index 5945a8cbf2199..17f4cf4dcdd85 100644 --- a/llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll +++ b/llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll @@ -30,42 +30,48 @@ target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" target triple = "nvptx64" %struct.ident_t = type { i32, i32, i32, i32, ptr } -%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 } -%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr } @0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8 +@__omp_offloading_2b_10393b5_spmd_l12_exec_mode = weak constant i8 1 +@__omp_offloading_2b_10393b5_generic_l20_exec_mode = weak constant i8 1 @2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @0 }, align 8 @G = external global i32, align 4 -@__omp_offloading_2b_10393b5_spmd_l12_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1 }, ptr @1, ptr null } -@__omp_offloading_2b_10393b5_generic_l20_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1 }, ptr @1, ptr null } +@llvm.compiler.used = appending global [2 x ptr] [ptr @__omp_offloading_2b_10393b5_spmd_l12_exec_mode, ptr @__omp_offloading_2b_10393b5_generic_l20_exec_mode], section "llvm.metadata" ;. ; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" ; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 +; CHECK: @[[__OMP_OFFLOADING_2B_10393B5_SPMD_L12_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 +; CHECK: @[[__OMP_OFFLOADING_2B_10393B5_GENERIC_L20_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 ; CHECK: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 2, i32 0, ptr @[[GLOB0]] }, align 8 ; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i32, align 4 -; CHECK: @[[__OMP_OFFLOADING_2B_10393B5_SPMD_L12_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null } -; CHECK: @[[__OMP_OFFLOADING_2B_10393B5_GENERIC_L20_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 3 }, ptr @[[GLOB1]], ptr null } +; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [2 x ptr] [ptr @__omp_offloading_2b_10393b5_spmd_l12_exec_mode, ptr @__omp_offloading_2b_10393b5_generic_l20_exec_mode], section "llvm.metadata" +; CHECK: @[[__OMP_OFFLOADING_2B_10393B5_SPMD_L12_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 ; CHECK: @[[GLOB3:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8 +; CHECK: @[[__OMP_OFFLOADING_2B_10393B5_GENERIC_L20_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0 ;. ; CHECK-DISABLE-SPMDIZATION: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" ; CHECK-DISABLE-SPMDIZATION: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 +; CHECK-DISABLE-SPMDIZATION: @[[__OMP_OFFLOADING_2B_10393B5_SPMD_L12_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 +; CHECK-DISABLE-SPMDIZATION: @[[__OMP_OFFLOADING_2B_10393B5_GENERIC_L20_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 ; CHECK-DISABLE-SPMDIZATION: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 2, i32 0, ptr @[[GLOB0]] }, align 8 ; CHECK-DISABLE-SPMDIZATION: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i32, align 4 -; CHECK-DISABLE-SPMDIZATION: @[[__OMP_OFFLOADING_2B_10393B5_SPMD_L12_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } -; CHECK-DISABLE-SPMDIZATION: @[[__OMP_OFFLOADING_2B_10393B5_GENERIC_L20_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null } +; CHECK-DISABLE-SPMDIZATION: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [2 x ptr] [ptr @__omp_offloading_2b_10393b5_spmd_l12_exec_mode, ptr @__omp_offloading_2b_10393b5_generic_l20_exec_mode], section "llvm.metadata" +; CHECK-DISABLE-SPMDIZATION: @[[__OMP_OFFLOADING_2B_10393B5_SPMD_L12_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 +; CHECK-DISABLE-SPMDIZATION: @[[__OMP_OFFLOADING_2B_10393B5_GENERIC_L20_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0 +; CHECK-DISABLE-SPMDIZATION: @[[__OMP_OUTLINED___WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef ;. define weak void @__omp_offloading_2b_10393b5_spmd_l12() #0 { ; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_2b_10393b5_spmd_l12 ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_2b_10393b5_spmd_l12_kernel_environment) +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK: user_code.entry: ; CHECK-NEXT: call void @spmd_helper() #[[ATTR5:[0-9]+]] -; CHECK-NEXT: call void @__kmpc_target_deinit() +; CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) ; CHECK-NEXT: ret void ; CHECK: worker.exit: ; CHECK-NEXT: ret void @@ -73,24 +79,58 @@ define weak void @__omp_offloading_2b_10393b5_spmd_l12() #0 { ; CHECK-DISABLE-SPMDIZATION-LABEL: define {{[^@]+}}@__omp_offloading_2b_10393b5_spmd_l12 ; CHECK-DISABLE-SPMDIZATION-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-DISABLE-SPMDIZATION-NEXT: entry: -; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_2b_10393b5_spmd_l12_kernel_environment) +; CHECK-DISABLE-SPMDIZATION-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 +; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) +; CHECK-DISABLE-SPMDIZATION-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 +; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] +; CHECK-DISABLE-SPMDIZATION: is_worker_check: +; CHECK-DISABLE-SPMDIZATION-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +; CHECK-DISABLE-SPMDIZATION-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() +; CHECK-DISABLE-SPMDIZATION-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] +; CHECK-DISABLE-SPMDIZATION-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] +; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] +; CHECK-DISABLE-SPMDIZATION: worker_state_machine.begin: +; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) +; CHECK-DISABLE-SPMDIZATION-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) +; CHECK-DISABLE-SPMDIZATION-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 +; CHECK-DISABLE-SPMDIZATION-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr +; CHECK-DISABLE-SPMDIZATION-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null +; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] +; CHECK-DISABLE-SPMDIZATION: worker_state_machine.finished: +; CHECK-DISABLE-SPMDIZATION-NEXT: ret void +; CHECK-DISABLE-SPMDIZATION: worker_state_machine.is_active.check: +; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] +; CHECK-DISABLE-SPMDIZATION: worker_state_machine.parallel_region.check: +; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] +; CHECK-DISABLE-SPMDIZATION: worker_state_machine.parallel_region.execute: +; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__omp_outlined___wrapper(i16 0, i32 [[TMP0]]) +; CHECK-DISABLE-SPMDIZATION-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] +; CHECK-DISABLE-SPMDIZATION: worker_state_machine.parallel_region.check1: +; CHECK-DISABLE-SPMDIZATION-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; CHECK-DISABLE-SPMDIZATION: worker_state_machine.parallel_region.end: +; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_kernel_end_parallel() +; CHECK-DISABLE-SPMDIZATION-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] +; CHECK-DISABLE-SPMDIZATION: worker_state_machine.done.barrier: +; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) +; CHECK-DISABLE-SPMDIZATION-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] +; CHECK-DISABLE-SPMDIZATION: thread.user_code.check: ; CHECK-DISABLE-SPMDIZATION-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK-DISABLE-SPMDIZATION: user_code.entry: ; CHECK-DISABLE-SPMDIZATION-NEXT: call void @spmd_helper() #[[ATTR5:[0-9]+]] -; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_target_deinit() +; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; CHECK-DISABLE-SPMDIZATION-NEXT: ret void ; CHECK-DISABLE-SPMDIZATION: worker.exit: ; CHECK-DISABLE-SPMDIZATION-NEXT: ret void ; entry: - %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_2b_10393b5_spmd_l12_kernel_environment) + %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit user_code.entry: ; preds = %entry call void @spmd_helper() #5 - call void @__kmpc_target_deinit() + call void @__kmpc_target_deinit(ptr @1, i8 1) ret void worker.exit: ; preds = %entry @@ -98,26 +138,26 @@ worker.exit: ; preds = %entry } ; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning. -define weak i32 @__kmpc_target_init(ptr) { +define weak i32 @__kmpc_target_init(ptr, i8, i1) { ; CHECK-LABEL: define {{[^@]+}}@__kmpc_target_init -; CHECK-SAME: (ptr [[TMP0:%.*]]) { +; CHECK-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { ; CHECK-NEXT: ret i32 0 ; ; CHECK-DISABLE-SPMDIZATION-LABEL: define {{[^@]+}}@__kmpc_target_init -; CHECK-DISABLE-SPMDIZATION-SAME: (ptr [[TMP0:%.*]]) { +; CHECK-DISABLE-SPMDIZATION-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { ; CHECK-DISABLE-SPMDIZATION-NEXT: ret i32 0 ; ret i32 0 } -declare void @__kmpc_target_deinit() +declare void @__kmpc_target_deinit(ptr, i8) ; Function Attrs: convergent noinline norecurse nounwind define weak void @__omp_offloading_2b_10393b5_generic_l20() #0 { ; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_2b_10393b5_generic_l20 ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_2b_10393b5_generic_l20_kernel_environment) +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) ; CHECK-NEXT: [[THREAD_ID_IN_BLOCK:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() ; CHECK-NEXT: [[THREAD_IS_MAIN:%.*]] = icmp ne i32 [[THREAD_ID_IN_BLOCK]], 0 ; CHECK-NEXT: br i1 [[THREAD_IS_MAIN]], label [[EXIT_THREADS:%.*]], label [[MAIN_THREAD_USER_CODE:%.*]] @@ -128,7 +168,7 @@ define weak void @__omp_offloading_2b_10393b5_generic_l20() #0 { ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK: user_code.entry: ; CHECK-NEXT: call void @generic_helper() #[[ATTR6:[0-9]+]] -; CHECK-NEXT: call void @__kmpc_target_deinit() +; CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) ; CHECK-NEXT: ret void ; CHECK: worker.exit: ; CHECK-NEXT: ret void @@ -136,24 +176,24 @@ define weak void @__omp_offloading_2b_10393b5_generic_l20() #0 { ; CHECK-DISABLE-SPMDIZATION-LABEL: define {{[^@]+}}@__omp_offloading_2b_10393b5_generic_l20 ; CHECK-DISABLE-SPMDIZATION-SAME: () #[[ATTR0]] { ; CHECK-DISABLE-SPMDIZATION-NEXT: entry: -; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_2b_10393b5_generic_l20_kernel_environment) +; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; CHECK-DISABLE-SPMDIZATION-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK-DISABLE-SPMDIZATION: user_code.entry: ; CHECK-DISABLE-SPMDIZATION-NEXT: call void @generic_helper() #[[ATTR6:[0-9]+]] -; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_target_deinit() +; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; CHECK-DISABLE-SPMDIZATION-NEXT: ret void ; CHECK-DISABLE-SPMDIZATION: worker.exit: ; CHECK-DISABLE-SPMDIZATION-NEXT: ret void ; entry: - %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_2b_10393b5_generic_l20_kernel_environment) + %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit user_code.entry: ; preds = %entry call void @generic_helper() #5 - call void @__kmpc_target_deinit() + call void @__kmpc_target_deinit(ptr @1, i8 1) ret void worker.exit: ; preds = %entry @@ -177,7 +217,7 @@ define internal void @spmd_helper() #1 { ; CHECK-DISABLE-SPMDIZATION-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 ; CHECK-DISABLE-SPMDIZATION-NEXT: call void @leaf() #[[ATTR6]] ; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR2:[0-9]+]] -; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; CHECK-DISABLE-SPMDIZATION-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/OpenMP/spmdization_no_guarding_two_reaching_kernels.ll b/llvm/test/Transforms/OpenMP/spmdization_no_guarding_two_reaching_kernels.ll index 35a47d9ee423c..291024780790a 100644 --- a/llvm/test/Transforms/OpenMP/spmdization_no_guarding_two_reaching_kernels.ll +++ b/llvm/test/Transforms/OpenMP/spmdization_no_guarding_two_reaching_kernels.ll @@ -31,42 +31,47 @@ target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" target triple = "nvptx64" %struct.ident_t = type { i32, i32, i32, i32, ptr } -%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 } -%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr } @0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8 +@__omp_offloading_2b_10393b5_spmd_l12_exec_mode = weak constant i8 1 +@__omp_offloading_2b_10393b5_generic_l20_exec_mode = weak constant i8 1 @2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @0 }, align 8 @G = external addrspace(5) global i32, align 4 -@__omp_offloading_2b_10393b5_spmd_l12_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null } -@__omp_offloading_2b_10393b5_generic_l20_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null } +@llvm.compiler.used = appending global [2 x ptr] [ptr @__omp_offloading_2b_10393b5_spmd_l12_exec_mode, ptr @__omp_offloading_2b_10393b5_generic_l20_exec_mode], section "llvm.metadata" ;. ; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" ; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 +; CHECK: @[[__OMP_OFFLOADING_2B_10393B5_SPMD_L12_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 +; CHECK: @[[__OMP_OFFLOADING_2B_10393B5_GENERIC_L20_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 ; CHECK: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 2, i32 0, ptr @[[GLOB0]] }, align 8 ; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external addrspace(5) global i32, align 4 -; CHECK: @[[__OMP_OFFLOADING_2B_10393B5_SPMD_L12_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 3 }, ptr @[[GLOB1]], ptr null } -; CHECK: @[[__OMP_OFFLOADING_2B_10393B5_GENERIC_L20_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null } +; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [2 x ptr] [ptr @__omp_offloading_2b_10393b5_spmd_l12_exec_mode, ptr @__omp_offloading_2b_10393b5_generic_l20_exec_mode], section "llvm.metadata" +; CHECK: @[[__OMP_OFFLOADING_2B_10393B5_SPMD_L12_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0 +; CHECK: @[[__OMP_OFFLOADING_2B_10393B5_GENERIC_L20_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0 ;. ; CHECK-DISABLE-SPMDIZATION: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" ; CHECK-DISABLE-SPMDIZATION: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 +; CHECK-DISABLE-SPMDIZATION: @[[__OMP_OFFLOADING_2B_10393B5_SPMD_L12_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 +; CHECK-DISABLE-SPMDIZATION: @[[__OMP_OFFLOADING_2B_10393B5_GENERIC_L20_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 ; CHECK-DISABLE-SPMDIZATION: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 2, i32 0, ptr @[[GLOB0]] }, align 8 ; CHECK-DISABLE-SPMDIZATION: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external addrspace(5) global i32, align 4 -; CHECK-DISABLE-SPMDIZATION: @[[__OMP_OFFLOADING_2B_10393B5_SPMD_L12_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null } -; CHECK-DISABLE-SPMDIZATION: @[[__OMP_OFFLOADING_2B_10393B5_GENERIC_L20_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null } +; CHECK-DISABLE-SPMDIZATION: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [2 x ptr] [ptr @__omp_offloading_2b_10393b5_spmd_l12_exec_mode, ptr @__omp_offloading_2b_10393b5_generic_l20_exec_mode], section "llvm.metadata" +; CHECK-DISABLE-SPMDIZATION: @[[__OMP_OFFLOADING_2B_10393B5_SPMD_L12_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0 +; CHECK-DISABLE-SPMDIZATION: @[[__OMP_OFFLOADING_2B_10393B5_GENERIC_L20_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0 ; CHECK-DISABLE-SPMDIZATION: @[[__OMP_OUTLINED___WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef ;. define weak void @__omp_offloading_2b_10393b5_spmd_l12() #0 { ; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_2b_10393b5_spmd_l12 ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_2b_10393b5_spmd_l12_kernel_environment) +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK: user_code.entry: ; CHECK-NEXT: call void @spmd_helper() #[[ATTR6:[0-9]+]] -; CHECK-NEXT: call void @__kmpc_target_deinit() +; CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) ; CHECK-NEXT: ret void ; CHECK: worker.exit: ; CHECK-NEXT: ret void @@ -75,7 +80,7 @@ define weak void @__omp_offloading_2b_10393b5_spmd_l12() #0 { ; CHECK-DISABLE-SPMDIZATION-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-DISABLE-SPMDIZATION-NEXT: entry: ; CHECK-DISABLE-SPMDIZATION-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 -; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_2b_10393b5_spmd_l12_kernel_environment) +; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; CHECK-DISABLE-SPMDIZATION-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; CHECK-DISABLE-SPMDIZATION: is_worker_check: @@ -113,19 +118,19 @@ define weak void @__omp_offloading_2b_10393b5_spmd_l12() #0 { ; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK-DISABLE-SPMDIZATION: user_code.entry: ; CHECK-DISABLE-SPMDIZATION-NEXT: call void @spmd_helper() #[[ATTR6:[0-9]+]] -; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_target_deinit() +; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; CHECK-DISABLE-SPMDIZATION-NEXT: ret void ; CHECK-DISABLE-SPMDIZATION: worker.exit: ; CHECK-DISABLE-SPMDIZATION-NEXT: ret void ; entry: - %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_2b_10393b5_spmd_l12_kernel_environment) + %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit user_code.entry: ; preds = %entry call void @spmd_helper() #5 - call void @__kmpc_target_deinit() + call void @__kmpc_target_deinit(ptr @1, i8 1) ret void worker.exit: ; preds = %entry @@ -133,19 +138,19 @@ worker.exit: ; preds = %entry } ; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning. -define weak i32 @__kmpc_target_init(ptr) { +define weak i32 @__kmpc_target_init(ptr, i8, i1) { ; CHECK-LABEL: define {{[^@]+}}@__kmpc_target_init -; CHECK-SAME: (ptr [[TMP0:%.*]]) { +; CHECK-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { ; CHECK-NEXT: ret i32 0 ; ; CHECK-DISABLE-SPMDIZATION-LABEL: define {{[^@]+}}@__kmpc_target_init -; CHECK-DISABLE-SPMDIZATION-SAME: (ptr [[TMP0:%.*]]) { +; CHECK-DISABLE-SPMDIZATION-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { ; CHECK-DISABLE-SPMDIZATION-NEXT: ret i32 0 ; ret i32 0 } -declare void @__kmpc_target_deinit() +declare void @__kmpc_target_deinit(ptr, i8) ; Function Attrs: convergent noinline norecurse nounwind define weak void @__omp_offloading_2b_10393b5_generic_l20() #0 { @@ -153,7 +158,7 @@ define weak void @__omp_offloading_2b_10393b5_generic_l20() #0 { ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_2b_10393b5_generic_l20_kernel_environment) +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; CHECK-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; CHECK-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; CHECK: is_worker_check: @@ -187,7 +192,7 @@ define weak void @__omp_offloading_2b_10393b5_generic_l20() #0 { ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK: user_code.entry: ; CHECK-NEXT: call void @generic_helper() #[[ATTR6]] -; CHECK-NEXT: call void @__kmpc_target_deinit() +; CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; CHECK-NEXT: ret void ; CHECK: worker.exit: ; CHECK-NEXT: ret void @@ -196,7 +201,7 @@ define weak void @__omp_offloading_2b_10393b5_generic_l20() #0 { ; CHECK-DISABLE-SPMDIZATION-SAME: () #[[ATTR0]] { ; CHECK-DISABLE-SPMDIZATION-NEXT: entry: ; CHECK-DISABLE-SPMDIZATION-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 -; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_2b_10393b5_generic_l20_kernel_environment) +; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; CHECK-DISABLE-SPMDIZATION-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; CHECK-DISABLE-SPMDIZATION: is_worker_check: @@ -230,19 +235,19 @@ define weak void @__omp_offloading_2b_10393b5_generic_l20() #0 { ; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK-DISABLE-SPMDIZATION: user_code.entry: ; CHECK-DISABLE-SPMDIZATION-NEXT: call void @generic_helper() #[[ATTR6]] -; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_target_deinit() +; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; CHECK-DISABLE-SPMDIZATION-NEXT: ret void ; CHECK-DISABLE-SPMDIZATION: worker.exit: ; CHECK-DISABLE-SPMDIZATION-NEXT: ret void ; entry: - %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_2b_10393b5_generic_l20_kernel_environment) + %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit user_code.entry: ; preds = %entry call void @generic_helper() #5 - call void @__kmpc_target_deinit() + call void @__kmpc_target_deinit(ptr @1, i8 1) ret void worker.exit: ; preds = %entry diff --git a/llvm/test/Transforms/OpenMP/spmdization_remarks.ll b/llvm/test/Transforms/OpenMP/spmdization_remarks.ll index fa900e60d4315..ff5ce93f66b98 100644 --- a/llvm/test/Transforms/OpenMP/spmdization_remarks.ll +++ b/llvm/test/Transforms/OpenMP/spmdization_remarks.ll @@ -38,8 +38,6 @@ target triple = "nvptx64" ;; } %struct.ident_t = type { i32, i32, i32, i32, ptr } -%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 } -%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr } @0 = private unnamed_addr constant [103 x i8] c";llvm/test/Transforms/OpenMP/spmdization_remarks.c;__omp_offloading_2a_d80d3d_test_fallback_l11;11;1;;\00", align 1 @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8 @@ -47,25 +45,24 @@ target triple = "nvptx64" @3 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @2 }, align 8 @4 = private unnamed_addr constant [104 x i8] c";llvm/test/Transforms/OpenMP/spmdization_remarks.c;__omp_offloading_2a_d80d3d_test_fallback_l11;11;25;;\00", align 1 @5 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @4 }, align 8 +@__omp_offloading_2a_d80d3d_test_fallback_l11_exec_mode = weak constant i8 1 @6 = private unnamed_addr constant [106 x i8] c";llvm/test/Transforms/OpenMP/spmdization_remarks.c;__omp_offloading_2a_d80d3d_test_no_fallback_l20;20;1;;\00", align 1 @7 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @6 }, align 8 @8 = private unnamed_addr constant [75 x i8] c";llvm/test/Transforms/OpenMP/spmdization_remarks.c;test_no_fallback;20;1;;\00", align 1 @9 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @8 }, align 8 @10 = private unnamed_addr constant [107 x i8] c";llvm/test/Transforms/OpenMP/spmdization_remarks.c;__omp_offloading_2a_d80d3d_test_no_fallback_l20;20;25;;\00", align 1 @11 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @10 }, align 8 +@__omp_offloading_2a_d80d3d_test_no_fallback_l20_exec_mode = weak constant i8 1 @12 = private unnamed_addr constant [63 x i8] c";llvm/test/Transforms/OpenMP/spmdization_remarks.c;known;4;1;;\00", align 1 @13 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @12 }, align 8 @G = external global i32 - -@__omp_offloading_2a_d80d3d_test_fallback_l11_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null } -@__omp_offloading_2a_d80d3d_test_no_fallback_l20_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null } - +@llvm.compiler.used = appending global [2 x ptr] [ptr @__omp_offloading_2a_d80d3d_test_fallback_l11_exec_mode, ptr @__omp_offloading_2a_d80d3d_test_no_fallback_l20_exec_mode], section "llvm.metadata" ; Function Attrs: convergent norecurse nounwind define weak void @__omp_offloading_2a_d80d3d_test_fallback_l11() local_unnamed_addr #0 !dbg !15 { entry: %captured_vars_addrs.i.i = alloca [0 x ptr], align 8 - %0 = call i32 @__kmpc_target_init(ptr nonnull @__omp_offloading_2a_d80d3d_test_fallback_l11_kernel_environment) #3, !dbg !18 + %0 = call i32 @__kmpc_target_init(ptr nonnull @1, i8 1, i1 true) #3, !dbg !18 %exec_user_code = icmp eq i32 %0, -1, !dbg !18 br i1 %exec_user_code, label %user_code.entry, label %common.ret, !dbg !18 @@ -80,11 +77,11 @@ user_code.entry: ; preds = %entry call void @__kmpc_parallel_51(ptr noundef nonnull @13, i32 %2, i32 noundef 1, i32 noundef -1, i32 noundef -1, ptr noundef @__omp_outlined__2, ptr noundef @__omp_outlined__2_wrapper, ptr noundef nonnull %captured_vars_addrs.i.i, i64 noundef 0) #3, !dbg !23 call void @llvm.lifetime.end.p0(i64 0, ptr nonnull %captured_vars_addrs.i.i) #3, !dbg !26 call void @unknown() #6, !dbg !27 - call void @__kmpc_target_deinit() #3, !dbg !28 + call void @__kmpc_target_deinit(ptr nonnull @5, i8 1) #3, !dbg !28 br label %common.ret } -define weak i32 @__kmpc_target_init(ptr) { +define weak i32 @__kmpc_target_init(ptr, i8, i1) { ret i32 0 } @@ -104,13 +101,13 @@ entry: ; Function Attrs: nounwind declare i32 @__kmpc_global_thread_num(ptr) local_unnamed_addr #3 -declare void @__kmpc_target_deinit() local_unnamed_addr +declare void @__kmpc_target_deinit(ptr, i8) local_unnamed_addr ; Function Attrs: norecurse nounwind define weak void @__omp_offloading_2a_d80d3d_test_no_fallback_l20() local_unnamed_addr #4 !dbg !32 { entry: %captured_vars_addrs.i2.i = alloca [0 x ptr], align 8 - %0 = call i32 @__kmpc_target_init(ptr nonnull @__omp_offloading_2a_d80d3d_test_no_fallback_l20_kernel_environment) #3, !dbg !33 + %0 = call i32 @__kmpc_target_init(ptr nonnull @7, i8 1, i1 true) #3, !dbg !33 %exec_user_code = icmp eq i32 %0, -1, !dbg !33 br i1 %exec_user_code, label %user_code.entry, label %common.ret, !dbg !33 @@ -132,7 +129,7 @@ user_code.entry: ; preds = %entry call void @__kmpc_parallel_51(ptr noundef nonnull @13, i32 %4, i32 noundef 1, i32 noundef -1, i32 noundef -1, ptr noundef @__omp_outlined__2, ptr noundef @__omp_outlined__2_wrapper, ptr noundef nonnull %captured_vars_addrs.i2.i, i64 noundef 0) #3, !dbg !43 call void @llvm.lifetime.end.p0(i64 0, ptr nonnull %captured_vars_addrs.i2.i) #3, !dbg !45 call void @spmd_amenable() - call void @__kmpc_target_deinit() #3, !dbg !46 + call void @__kmpc_target_deinit(ptr nonnull @11, i8 1) #3, !dbg !46 br label %common.ret } diff --git a/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll b/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll index a8cffb935b916..65ee949da7fc6 100644 --- a/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll +++ b/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll @@ -4,29 +4,32 @@ target triple = "amdgcn-amd-amdhsa" -%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 } -%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr } +%struct.ident_t = type { i32, i32, i32, i32, ptr } @G = internal addrspace(3) global i32 undef, align 4 @H = internal addrspace(3) global i32 undef, align 4 @X = internal addrspace(3) global i32 undef, align 4 @str = private unnamed_addr addrspace(4) constant [1 x i8] c"\00", align 1 -@kernel_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1 }, ptr null, ptr null } ; Make sure we do not delete the stores to @G without also replacing the load with `1`. ;. -; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4 -; CHECK: @[[H:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4 -; CHECK: @[[X:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4 -; CHECK: @[[STR:[a-zA-Z0-9_$"\\.-]+]] = private unnamed_addr addrspace(4) constant [1 x i8] zeroinitializer, align 1 -; CHECK: @[[KERNEL_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr null, ptr null } +; TUNIT: @[[G:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4 +; TUNIT: @[[H:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4 +; TUNIT: @[[X:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4 +; TUNIT: @[[STR:[a-zA-Z0-9_$"\\.-]+]] = private unnamed_addr addrspace(4) constant [1 x i8] zeroinitializer, align 1 +; TUNIT: @[[KERNEL_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0 +;. +; CGSCC: @[[G:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4 +; CGSCC: @[[H:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4 +; CGSCC: @[[X:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4 +; CGSCC: @[[STR:[a-zA-Z0-9_$"\\.-]+]] = private unnamed_addr addrspace(4) constant [1 x i8] zeroinitializer, align 1 ;. define void @kernel() "kernel" { ; ; CHECK: Function Attrs: norecurse ; CHECK-LABEL: define {{[^@]+}}@kernel ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(ptr @kernel_kernel_environment) +; CHECK-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(ptr undef, i8 1, i1 false) ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], -1 ; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; CHECK: if.then: @@ -44,10 +47,10 @@ define void @kernel() "kernel" { ; CHECK-NEXT: call void @barrier() #[[ATTR6]] ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: -; CHECK-NEXT: call void @__kmpc_target_deinit() +; CHECK-NEXT: call void @__kmpc_target_deinit(ptr undef, i8 1) ; CHECK-NEXT: ret void ; - %call = call i32 @__kmpc_target_init(ptr @kernel_kernel_environment) + %call = call i32 @__kmpc_target_init(ptr undef, i8 1, i1 false) %cmp = icmp eq i32 %call, -1 br i1 %cmp, label %if.then, label %if.else if.then: @@ -72,7 +75,7 @@ if.then2: call void @barrier(); br label %if.end if.end: - call void @__kmpc_target_deinit() + call void @__kmpc_target_deinit(ptr undef, i8 1) ret void } @@ -144,8 +147,8 @@ define void @sync_def() { declare void @sync() declare void @barrier() norecurse nounwind nocallback "llvm.assume"="ompx_aligned_barrier" declare void @use1(i32) nosync norecurse nounwind nocallback -declare i32 @__kmpc_target_init(ptr) nocallback -declare void @__kmpc_target_deinit() nocallback +declare i32 @__kmpc_target_init(ptr, i8, i1) nocallback +declare void @__kmpc_target_deinit(ptr, i8) nocallback declare void @llvm.assume(i1) !llvm.module.flags = !{!0, !1} diff --git a/llvm/test/Transforms/PhaseOrdering/openmp-opt-module.ll b/llvm/test/Transforms/PhaseOrdering/openmp-opt-module.ll index a991c827812fb..3014f5bc9921a 100644 --- a/llvm/test/Transforms/PhaseOrdering/openmp-opt-module.ll +++ b/llvm/test/Transforms/PhaseOrdering/openmp-opt-module.ll @@ -1,22 +1,21 @@ ; RUN: opt -passes='default' -pass-remarks-missed=openmp-opt < %s 2>&1 | FileCheck %s --check-prefix=MODULE target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" -%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 } -%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr } +%struct.ident_t = type { i32, i32, i32, i32, ptr } @.str = private unnamed_addr constant [13 x i8] c"Alloc Shared\00", align 1 + @S = external local_unnamed_addr global ptr -@foo_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr null, ptr null } ; MODULE: remark: openmp_opt_module.c:5:7: Found thread data sharing on the GPU. Expect degraded performance due to data globalization. define void @foo() { entry: - %i = call i32 @__kmpc_target_init(ptr @foo_kernel_environment) + %i = call i32 @__kmpc_target_init(ptr null, i1 false, i1 true, i1 true) %x = call ptr @__kmpc_alloc_shared(i64 4), !dbg !10 call void @use(ptr %x) call void @__kmpc_free_shared(ptr %x) - call void @__kmpc_target_deinit() + call void @__kmpc_target_deinit(ptr null, i1 false, i1 true) ret void } @@ -32,8 +31,8 @@ entry: declare ptr @_Z10SafeMallocmPKc(i64 %size, ptr nocapture readnone %msg) declare void @__kmpc_free_shared(ptr) -declare i32 @__kmpc_target_init(ptr) -declare void @__kmpc_target_deinit() +declare i32 @__kmpc_target_init(ptr, i1, i1 %use_generic_state_machine, i1) +declare void @__kmpc_target_deinit(ptr, i1, i1) !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!3, !4, !5, !6} diff --git a/openmp/libomptarget/DeviceRTL/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/CMakeLists.txt index b55b236457a1a..8419b87ef6222 100644 --- a/openmp/libomptarget/DeviceRTL/CMakeLists.txt +++ b/openmp/libomptarget/DeviceRTL/CMakeLists.txt @@ -126,7 +126,6 @@ set(bc_flags -c -foffload-lto -std=c++17 -fvisibility=hidden -nocudalib -nogpulib -nostdinc -fopenmp -fopenmp-cuda-mode -Wno-unknown-cuda-version - -DOMPTARGET_DEVICE_RUNTIME -I${include_directory} -I${devicertl_base_directory}/../include ${LIBOMPTARGET_LLVM_INCLUDE_DIRS_DEVICERTL} diff --git a/openmp/libomptarget/DeviceRTL/include/Debug.h b/openmp/libomptarget/DeviceRTL/include/Debug.h index 00e18c9b5650f..128572dfec606 100644 --- a/openmp/libomptarget/DeviceRTL/include/Debug.h +++ b/openmp/libomptarget/DeviceRTL/include/Debug.h @@ -50,6 +50,8 @@ void __assert_fail(const char *assertion, const char *file, unsigned line, struct DebugEntryRAII { DebugEntryRAII(const char *File, const unsigned Line, const char *Function); ~DebugEntryRAII(); + + static void init(); }; #endif diff --git a/openmp/libomptarget/DeviceRTL/include/Interface.h b/openmp/libomptarget/DeviceRTL/include/Interface.h index 8b6019b9dc2ab..648da49b86f55 100644 --- a/openmp/libomptarget/DeviceRTL/include/Interface.h +++ b/openmp/libomptarget/DeviceRTL/include/Interface.h @@ -214,14 +214,12 @@ uint32_t __kmpc_get_warp_size(); /// Kernel /// ///{ -// Forward declaration -struct KernelEnvironmentTy; - int8_t __kmpc_is_spmd_exec_mode(); -int32_t __kmpc_target_init(KernelEnvironmentTy &KernelEnvironment); +int32_t __kmpc_target_init(IdentTy *Ident, int8_t Mode, + bool UseGenericStateMachine); -void __kmpc_target_deinit(); +void __kmpc_target_deinit(IdentTy *Ident, int8_t Mode); ///} diff --git a/openmp/libomptarget/DeviceRTL/include/State.h b/openmp/libomptarget/DeviceRTL/include/State.h index 04af48db5620e..aac5a2275fcac 100644 --- a/openmp/libomptarget/DeviceRTL/include/State.h +++ b/openmp/libomptarget/DeviceRTL/include/State.h @@ -17,9 +17,6 @@ #include "Types.h" #include "Utils.h" -// Forward declaration. -struct KernelEnvironmentTy; - #pragma omp begin declare target device_type(nohost) namespace ompx { @@ -116,10 +113,7 @@ extern ThreadStateTy **ThreadStates; #pragma omp allocate(ThreadStates) allocator(omp_pteam_mem_alloc) /// Initialize the state machinery. Must be called by all threads. -void init(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment); - -/// Return the kernel environment associated with the current kernel. -KernelEnvironmentTy &getKernelEnvironment(); +void init(bool IsSPMD); /// TODO enum ValueKind { diff --git a/openmp/libomptarget/DeviceRTL/src/Configuration.cpp b/openmp/libomptarget/DeviceRTL/src/Configuration.cpp index 2548198ffc682..ceccef625ed29 100644 --- a/openmp/libomptarget/DeviceRTL/src/Configuration.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Configuration.cpp @@ -12,7 +12,7 @@ //===----------------------------------------------------------------------===// #include "Configuration.h" -#include "Environment.h" +#include "DeviceEnvironment.h" #include "State.h" #include "Types.h" @@ -53,9 +53,7 @@ bool config::isDebugMode(config::DebugKind Kind) { bool config::mayUseThreadStates() { return !__omp_rtl_assume_no_thread_state; } bool config::mayUseNestedParallelism() { - if (__omp_rtl_assume_no_nested_parallelism) - return false; - return state::getKernelEnvironment().Configuration.MayUseNestedParallelism; + return !__omp_rtl_assume_no_nested_parallelism; } #pragma omp end declare target diff --git a/openmp/libomptarget/DeviceRTL/src/Debug.cpp b/openmp/libomptarget/DeviceRTL/src/Debug.cpp index 5a7bf80551b63..a1b289e830227 100644 --- a/openmp/libomptarget/DeviceRTL/src/Debug.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Debug.cpp @@ -12,10 +12,8 @@ #include "Debug.h" #include "Configuration.h" -#include "Environment.h" #include "Interface.h" #include "Mapping.h" -#include "State.h" #include "Types.h" using namespace ompx; @@ -33,14 +31,15 @@ void __assert_fail(const char *assertion, const char *file, unsigned line, } } +/// Current indentation level for the function trace. Only accessed by thread 0. +__attribute__((loader_uninitialized)) static uint32_t Level; +#pragma omp allocate(Level) allocator(omp_pteam_mem_alloc) + DebugEntryRAII::DebugEntryRAII(const char *File, const unsigned Line, const char *Function) { if (config::isDebugMode(config::DebugKind::FunctionTracing) && mapping::getThreadIdInBlock() == 0 && mapping::getBlockId() == 0) { - uint16_t &Level = - state::getKernelEnvironment().DynamicEnv->DebugIndentionLevel; - for (int I = 0; I < Level; ++I) PRINTF("%s", " "); @@ -52,11 +51,10 @@ DebugEntryRAII::DebugEntryRAII(const char *File, const unsigned Line, DebugEntryRAII::~DebugEntryRAII() { if (config::isDebugMode(config::DebugKind::FunctionTracing) && - mapping::getThreadIdInBlock() == 0 && mapping::getBlockId() == 0) { - uint16_t &Level = - state::getKernelEnvironment().DynamicEnv->DebugIndentionLevel; + mapping::getThreadIdInBlock() == 0 && mapping::getBlockId() == 0) Level--; - } } +void DebugEntryRAII::init() { Level = 0; } + #pragma omp end declare target diff --git a/openmp/libomptarget/DeviceRTL/src/Kernel.cpp b/openmp/libomptarget/DeviceRTL/src/Kernel.cpp index 9ab1b5f57be4d..fa774afe469b7 100644 --- a/openmp/libomptarget/DeviceRTL/src/Kernel.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Kernel.cpp @@ -11,7 +11,6 @@ //===----------------------------------------------------------------------===// #include "Debug.h" -#include "Environment.h" #include "Interface.h" #include "Mapping.h" #include "State.h" @@ -24,12 +23,11 @@ using namespace ompx; #pragma omp begin declare target device_type(nohost) -static void inititializeRuntime(bool IsSPMD, - KernelEnvironmentTy &KernelEnvironment) { +static void inititializeRuntime(bool IsSPMD) { // Order is important here. synchronize::init(IsSPMD); mapping::init(IsSPMD); - state::init(IsSPMD, KernelEnvironment); + state::init(IsSPMD); } /// Simple generic state machine for worker threads. @@ -69,17 +67,16 @@ extern "C" { /// /// \param Ident Source location identification, can be NULL. /// -int32_t __kmpc_target_init(KernelEnvironmentTy &KernelEnvironment) { +int32_t __kmpc_target_init(IdentTy *Ident, int8_t Mode, + bool UseGenericStateMachine) { FunctionTracingRAII(); - ConfigurationEnvironmentTy &Configuration = KernelEnvironment.Configuration; - bool IsSPMD = Configuration.ExecMode & - llvm::omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_SPMD; - bool UseGenericStateMachine = Configuration.UseGenericStateMachine; + const bool IsSPMD = + Mode & llvm::omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_SPMD; if (IsSPMD) { - inititializeRuntime(/* IsSPMD */ true, KernelEnvironment); + inititializeRuntime(/* IsSPMD */ true); synchronize::threadsAligned(atomic::relaxed); } else { - inititializeRuntime(/* IsSPMD */ false, KernelEnvironment); + inititializeRuntime(/* IsSPMD */ false); // No need to wait since only the main threads will execute user // code and workers will run into a barrier right away. } @@ -111,7 +108,7 @@ int32_t __kmpc_target_init(KernelEnvironmentTy &KernelEnvironment) { // thread's warp, so none of its threads can ever be active worker threads. if (UseGenericStateMachine && mapping::getThreadIdInBlock() < mapping::getBlockSize(IsSPMD)) { - genericStateMachine(KernelEnvironment.Ident); + genericStateMachine(Ident); } else { // Retrieve the work function just to ensure we always call // __kmpc_kernel_parallel even if a custom state machine is used. @@ -135,10 +132,11 @@ int32_t __kmpc_target_init(KernelEnvironmentTy &KernelEnvironment) { /// /// \param Ident Source location identification, can be NULL. /// -void __kmpc_target_deinit() { +void __kmpc_target_deinit(IdentTy *Ident, int8_t Mode) { FunctionTracingRAII(); - bool IsSPMD = mapping::isSPMDMode(); - state::assumeInitialState(IsSPMD); + const bool IsSPMD = + Mode & llvm::omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_SPMD; + if (IsSPMD) return; diff --git a/openmp/libomptarget/DeviceRTL/src/State.cpp b/openmp/libomptarget/DeviceRTL/src/State.cpp index 09d024d17d2f8..9c1c9abaf493e 100644 --- a/openmp/libomptarget/DeviceRTL/src/State.cpp +++ b/openmp/libomptarget/DeviceRTL/src/State.cpp @@ -9,8 +9,8 @@ //===----------------------------------------------------------------------===// #include "State.h" +#include "Configuration.h" #include "Debug.h" -#include "Environment.h" #include "Interface.h" #include "Mapping.h" #include "Synchronization.h" @@ -34,9 +34,6 @@ constexpr const uint32_t Alignment = 16; extern unsigned char DynamicSharedBuffer[] __attribute__((aligned(Alignment))); #pragma omp allocate(DynamicSharedBuffer) allocator(omp_pteam_mem_alloc) -/// The kernel environment passed to the init method by the compiler. -static KernelEnvironmentTy *SHARED(KernelEnvironmentPtr); - namespace { /// Fallback implementations are missing to trigger a link time error. @@ -244,19 +241,15 @@ int returnValIfLevelIsActive(int Level, int Val, int DefaultVal, } // namespace -void state::init(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment) { +void state::init(bool IsSPMD) { SharedMemorySmartStack.init(IsSPMD); if (mapping::isInitialThreadInLevel0(IsSPMD)) { TeamState.init(IsSPMD); + DebugEntryRAII::init(); ThreadStates = nullptr; - KernelEnvironmentPtr = &KernelEnvironment; } } -KernelEnvironmentTy &state::getKernelEnvironment() { - return *KernelEnvironmentPtr; -} - void state::enterDataEnvironment(IdentTy *Ident) { ASSERT(config::mayUseThreadStates() && "Thread state modified while explicitly disabled!"); diff --git a/openmp/libomptarget/include/DeviceEnvironment.h b/openmp/libomptarget/include/DeviceEnvironment.h new file mode 100644 index 0000000000000..231492c68f762 --- /dev/null +++ b/openmp/libomptarget/include/DeviceEnvironment.h @@ -0,0 +1,25 @@ +//===---- device_environment.h - OpenMP GPU device environment ---- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Global device environment +// +//===----------------------------------------------------------------------===// + +#ifndef _OMPTARGET_DEVICE_ENVIRONMENT_H_ +#define _OMPTARGET_DEVICE_ENVIRONMENT_H_ + +// deviceRTL uses and DeviceRTL uses explicit definitions + +struct DeviceEnvironmentTy { + uint32_t DebugKind; + uint32_t NumDevices; + uint32_t DeviceNum; + uint32_t DynamicMemSize; +}; + +#endif diff --git a/openmp/libomptarget/include/Environment.h b/openmp/libomptarget/include/Environment.h deleted file mode 100644 index 5eb590908660e..0000000000000 --- a/openmp/libomptarget/include/Environment.h +++ /dev/null @@ -1,61 +0,0 @@ -//===------------ Environment.h - OpenMP GPU environments --------- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Environments shared between host and device. -// -//===----------------------------------------------------------------------===// - -#ifndef _OMPTARGET_ENVIRONMENT_H_ -#define _OMPTARGET_ENVIRONMENT_H_ - -#ifdef OMPTARGET_DEVICE_RUNTIME -#include "Types.h" -#else -#include "SourceInfo.h" - -#include - -using IdentTy = ident_t; -#endif - -#include "llvm/Frontend/OpenMP/OMPDeviceConstants.h" - -struct DeviceEnvironmentTy { - uint32_t DebugKind; - uint32_t NumDevices; - uint32_t DeviceNum; - uint32_t DynamicMemSize; -}; - -// NOTE: Please don't change the order of those members as their indices are -// used in the middle end. Always add the new data member at the end. -// Different from KernelEnvironmentTy below, this structure contains members -// that might be modified at runtime. -struct DynamicEnvironmentTy { - /// Current indentation level for the function trace. Only accessed by thread - /// 0. - uint16_t DebugIndentionLevel; -}; - -// NOTE: Please don't change the order of those members as their indices are -// used in the middle end. Always add the new data member at the end. -struct ConfigurationEnvironmentTy { - uint8_t UseGenericStateMachine; - uint8_t MayUseNestedParallelism; - llvm::omp::OMPTgtExecModeFlags ExecMode; -}; - -// NOTE: Please don't change the order of those members as their indices are -// used in the middle end. Always add the new data member at the end. -struct KernelEnvironmentTy { - ConfigurationEnvironmentTy Configuration; - IdentTy *Ident; - DynamicEnvironmentTy *DynamicEnv; -}; - -#endif // _OMPTARGET_ENVIRONMENT_H_ diff --git a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp index 59e69f858ed7c..f9b0371f903a8 100644 --- a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp @@ -21,7 +21,7 @@ #include #include "Debug.h" -#include "Environment.h" +#include "DeviceEnvironment.h" #include "GlobalHandler.h" #include "PluginInterface.h" #include "Utilities.h" diff --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp index 1e551e5ba16d6..7ebc1d1092cd5 100644 --- a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp +++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp @@ -582,45 +582,32 @@ Error GenericDeviceTy::registerKernelOffloadEntry( return Plugin::success(); } -Expected -GenericDeviceTy::getKernelEnvironmentForKernel(StringRef Name, - DeviceImageTy &Image) { - // Create a metadata object for the kernel environment object. - StaticGlobalTy KernelEnv(Name.data(), - "_kernel_environment"); - - // Retrieve kernel environment object for the kernel. +Expected +GenericDeviceTy::getExecutionModeForKernel(StringRef Name, + DeviceImageTy &Image) { + // Create a metadata object for the exec mode global (auto-generated). + StaticGlobalTy ExecModeGlobal(Name.data(), + "_exec_mode"); + + // Retrieve execution mode for the kernel. This may fail since some kernels + // may not have an execution mode. GenericGlobalHandlerTy &GHandler = Plugin::get().getGlobalHandler(); - if (auto Err = GHandler.readGlobalFromImage(*this, Image, KernelEnv)) { + if (auto Err = GHandler.readGlobalFromImage(*this, Image, ExecModeGlobal)) { // Consume the error since it is acceptable to fail. [[maybe_unused]] std::string ErrStr = toString(std::move(Err)); - DP("Failed to read kernel environment object for '%s': %s\n", Name.data(), - ErrStr.data()); - - return createStringError(inconvertibleErrorCode(), ErrStr); - } + DP("Failed to read execution mode for '%s': %s\n" + "Using default SPMD (2) execution mode\n", + Name.data(), ErrStr.data()); - return KernelEnv.getValue(); -} - -Expected -GenericDeviceTy::getExecutionModeForKernel(StringRef Name, - DeviceImageTy &Image) { - auto KernelEnvOrError = getKernelEnvironmentForKernel(Name, Image); - if (!KernelEnvOrError) { - (void)KernelEnvOrError.takeError(); return OMP_TGT_EXEC_MODE_SPMD; } - auto &KernelEnv = *KernelEnvOrError; - auto ExecMode = KernelEnv.Configuration.ExecMode; - // Check that the retrieved execution mode is valid. - if (!GenericKernelTy::isValidExecutionMode(ExecMode)) - return Plugin::error("Invalid execution mode %d for '%s'", ExecMode, - Name.data()); + if (!GenericKernelTy::isValidExecutionMode(ExecModeGlobal.getValue())) + return Plugin::error("Invalid execution mode %d for '%s'", + ExecModeGlobal.getValue(), Name.data()); - return ExecMode; + return ExecModeGlobal.getValue(); } Error PinnedAllocationMapTy::insertEntry(void *HstPtr, void *DevAccessiblePtr, diff --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h index 2b64b6f9571c8..a91ea81183c21 100644 --- a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h +++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h @@ -19,7 +19,7 @@ #include #include "Debug.h" -#include "Environment.h" +#include "DeviceEnvironment.h" #include "GlobalHandler.h" #include "JIT.h" #include "MemoryManager.h" @@ -748,11 +748,6 @@ struct GenericDeviceTy : public DeviceAllocatorTy { /// Map of host pinned allocations used for optimize device transfers. PinnedAllocationMapTy PinnedAllocs; - -private: - /// Return the kernel environment object for kernel \p Name. - Expected - getKernelEnvironmentForKernel(StringRef Name, DeviceImageTy &Image); }; /// Class implementing common functionalities of offload plugins. Each plugin diff --git a/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp index 34c25099a016e..9e38d851196c0 100644 --- a/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp +++ b/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp @@ -17,7 +17,7 @@ #include #include "Debug.h" -#include "Environment.h" +#include "DeviceEnvironment.h" #include "GlobalHandler.h" #include "PluginInterface.h" diff --git a/openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp index d56789b7609de..a9b828826b1bb 100644 --- a/openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp +++ b/openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp @@ -17,7 +17,7 @@ #include #include "Debug.h" -#include "Environment.h" +#include "DeviceEnvironment.h" #include "GlobalHandler.h" #include "PluginInterface.h" #include "omptarget.h" diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp index 370b764e969bc..d25a520326b9a 100644 --- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp @@ -37,7 +37,7 @@ #include "internal.h" #include "rt.h" -#include "Environment.h" +#include "DeviceEnvironment.h" #include "get_elf_mach_gfx_name.h" #include "omptargetplugin.h" #include "print_tracing.h" diff --git a/openmp/libomptarget/plugins/cuda/src/rtl.cpp b/openmp/libomptarget/plugins/cuda/src/rtl.cpp index b9d2a82c92fb3..871047494c27a 100644 --- a/openmp/libomptarget/plugins/cuda/src/rtl.cpp +++ b/openmp/libomptarget/plugins/cuda/src/rtl.cpp @@ -23,7 +23,7 @@ #include #include "Debug.h" -#include "Environment.h" +#include "DeviceEnvironment.h" #include "omptarget.h" #include "omptargetplugin.h"